├── .gitignore
├── ChangeLog.md
├── LICENSE
├── README.md
├── Setup.hs
├── bin
├── make-pkg
└── test-all
├── docs
├── IntPair_submachine.png
├── MyChar_ATN.png
├── PExpression_ATN.png
├── PTuple_1.png
├── PTuple_2.png
├── PTuple_3.png
├── PTuple_4.png
└── pads_to_atn_conversion.md
├── lang_demo.hs
├── package.yaml
├── src
├── Data
│ └── Set
│ │ └── Monad.hs
├── Language
│ ├── ANTLR4.hs
│ └── ANTLR4
│ │ ├── Boot
│ │ ├── Parser.hs.boot
│ │ ├── Quote.hs
│ │ ├── SplicedParser.hs
│ │ └── Syntax.hs
│ │ ├── FileOpener.hs
│ │ ├── G4.hs
│ │ ├── Parser.hs
│ │ ├── Parser.hs.bak
│ │ ├── Regex.hs.boot
│ │ └── Syntax.hs
└── Text
│ └── ANTLR
│ ├── Allstar.hs
│ ├── Allstar
│ ├── ATN.hs
│ ├── ParserGenerator.hs
│ └── Stacks.hs
│ ├── Common.hs
│ ├── Grammar.hs
│ ├── LL1.hs
│ ├── LR.hs
│ ├── Language.hs
│ ├── Lex.hs
│ ├── Lex
│ ├── Automata.hs
│ ├── DFA.hs
│ ├── NFA.hs
│ ├── Regex.hs
│ └── Tokenizer.hs
│ ├── MultiMap.hs
│ ├── Parser.hs
│ ├── Pretty.hs
│ └── Set.hs
├── stack.yaml
└── test
├── GrammarReader
├── GrammarReader.hs
├── output
│ └── generated_atn_lookup.txt
└── sample_grammars
│ └── sample_bnf_grammar.txt
├── allstar
├── AllStarTests.hs
├── ConvertDFA.hs
├── ConvertP.hs
├── Main.hs
└── README.md
├── atn
├── ATN.hs
└── Main.hs
├── c
├── C.g4
├── CParser.hs
└── Main.hs
├── chisel
├── Language
│ └── Chisel
│ │ ├── Grammar.hs
│ │ ├── Parser.hs
│ │ └── Syntax.hs
└── Main.hs
├── coreg4
├── G4.hs
├── G4Fast.hs
├── G4Parser.hs
├── Hello.hs
├── HelloParser.hs
└── Main.hs
├── g4
├── DoubleSemi.hs
├── DoubleSemiP.hs
├── Empty.hs
├── EmptyP.hs
├── Main.hs
├── Optional.hs
└── OptionalParser.hs
├── lexer
└── Main.hs
├── ll
└── Main.hs
├── lr
├── EOF.hs
├── EOFGrammar.hs
├── GLRInc.hs
├── GLRIncGrammar.hs
├── GLRPartial.hs
├── GLRPartialGrammar.hs
└── Main.hs
├── sexpression
├── Grammar.hs
├── Parser.hs
└── sexpression.hs
├── shared-hunit
└── Text
│ └── ANTLR
│ └── HUnit.hs
├── shared
└── Example
│ └── Grammar.hs
├── simple
├── Grammar.hs
└── Main.hs
├── swift
├── Grammar.hs
├── Parser.hs
├── Swift.g4
└── swift.hs
├── template
└── Main.hs
├── unit
├── Main.hs
└── PlusBug0.hs
└── unit0
├── DupTerms.hs
├── DupTermsGrammar.hs
├── Main.hs
├── Star0.hs
├── Star0Grammar.hs
├── Star1.hs
└── Star1Grammar.hs
/.gitignore:
--------------------------------------------------------------------------------
1 | /*.local
2 | /stack.yaml.lock
3 | cabal.config
4 | antlr-haskell.cabal
5 | /books
6 | .stack-work/
7 | dist
8 | /dist-*
9 | cabal-dev
10 | *.o
11 | *.hi
12 | *.chi
13 | *.chs.h
14 | *.dyn_o
15 | *.dyn_hi
16 | .virtualenv
17 | .hpc
18 | .hsenv
19 | .cabal-sandbox/
20 | cabal.sandbox.config
21 | *.prof
22 | *.aux
23 | *.hp
24 | *~
25 |
--------------------------------------------------------------------------------
/ChangeLog.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 | * April 14, 2018: Moved to hpack's package.yaml format instead of native cabal
4 | file.
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2017-2018, Karl Cronburg & Sam Lasser
2 |
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | * Neither the name of Karl Cronburg, Sam Lasser, nor the names of other
17 | contributors may be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # antlr-haskell
2 | A Haskell implementation of ANTLR.
3 |
4 | In implementing ANTLR we referenced the behavior of the original Java version
5 | (ANTLR4):
6 | [The definitive ANTLR4 Reference.](https://pragprog.com/book/tpantlr2/the-definitive-antlr-4-reference)
7 | However we have taken much liberty in the design of this library compared to the
8 | workflow of the original Java version. In particular in implementing ANTLR for
9 | Haskell we have followed the following principles:
10 |
11 | - Parsing backends should be interchangeable
12 | - GLR, LR, SLR, LL, ALL(\*)
13 | - Code should be first class and declarative
14 | - The implementation of G4 is metacircular
15 | - Regular expressions are interpreted
16 | - Implement algorithms from first principles
17 | - Set notation is used in implementing LL and LR algorithms.
18 | - Pure functional implementations of parsing algorithms can eventually support
19 | embedding of arbitrary (including IO) actions without breaking the predictive
20 | parsing abstraction.
21 |
22 | More info can be found here:
23 | [https://www.cronburg.com/2018/antlr-haskell-project/](https://www.cronburg.com/2018/antlr-haskell-project/)
24 |
25 | ## Build instructions
26 |
27 | The library can be built with:
28 |
29 | ```bash
30 | $ stack build # stack version 2.3.3
31 | $ stack test :simple
32 | ```
33 |
34 | Or with cabal (tested on 3.0.0.0) like so:
35 |
36 | ```bash
37 | $ hpack
38 | $ cabal update
39 | $ cabal configure
40 | $ cabal new-build
41 | $ cabal test sexpression
42 | ...
43 | Test suite sexpression: RUNNING...
44 | Test suite sexpression: PASS
45 | Test suite logged to:
46 | /antlr-haskell/dist-newstyle/build/x86_64-linux/ghc-8.6.5/antlr-haskell-0.1.0.1/t/sexpression/test/antlr-haskell-0.1.0.1-sexpression.log
47 | 1 of 1 test suites (1 of 1 test cases) passed.
48 | ```
49 |
50 | Here's a good one to run when making changes to the library, and you're unsure
51 | of what may become affected by those changes:
52 |
53 | ```bash
54 | stack test :simple :atn :ll :lr :sexpression :allstar :c
55 | ```
56 |
57 | And then compare the results with that of this upstream branch. Some of the
58 | GLR features (incremental and partial tokenization, notably) are still experimental,
59 | and so there are known test cases which currently fail.
60 |
61 | ## Version History
62 |
63 | - September 25th, 2020. Released version 0.1.0.1: bug fixes, documentation, and
64 | library versioning updates.
65 |
66 |
--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 |
--------------------------------------------------------------------------------
/bin/make-pkg:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | SCRIPTDIR="$(cd "`dirname $0`"; pwd)"
3 | GITDIR="$(dirname "$SCRIPTDIR")"
4 |
5 | # =============================================================================
6 | # MODIFY THESE PARAMETERS EACH TIME YOU PUBLISH A NEW VERSION:
7 | arch=x86_64-linux-tinfo6
8 | cVersion=3.0.1.0
9 | pVersion=0.1.0.1
10 | # =============================================================================
11 |
12 | gzDir=$GITDIR/.stack-work/dist/$arch/Cabal-$cVersion
13 | gzFile=antlr-haskell-$pVersion.tar.gz
14 |
15 | echo "Changing to directory $GITDIR"
16 | cd "$GITDIR"
17 |
18 | # Generates .stack-work/dist/[arch]/Cabal-[version]/antlr-haskell-[version].tar.gz :
19 | echo "Generating $gzFile"
20 | stack sdist --pvp-bounds both
21 |
22 | # Generates .stack-work/dist/[arch]/Cabal-[version]/doc/html/antlr-haskell/* :
23 | echo "Generating haddock documentation"
24 | stack haddock --haddock-hyperlink-source # --no-haddock-deps
25 |
26 | # Copy source gzipped tar file to dist:
27 | mkdir -p dist
28 | cp $gzDir/$gzFile dist/
29 |
30 | # Copy over the haddock documentation:
31 | destDir=dist/antlr-haskell-$pVersion/antlr-haskell-$pVersion-docs
32 | mkdir -p $destDir
33 | cp -r $gzDir/doc/html/antlr-haskell $destDir
34 |
35 | # Generate tar file to upload to hackage:
36 | echo "Making haddock tar file for *manual* upload to https://hackage.haskell.org/package/antlr-haskell-$pVersion/maintain/docs"
37 | tarFile=antlr-haskell-$pVersion-docs.tar
38 | tar --format=ustar -cvf dist/$tarFile $destDir | grep index.html
39 |
40 |
--------------------------------------------------------------------------------
/bin/test-all:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # && stack test antlr-haskell:Stacks \
4 |
5 | OPTS=--ghc-options="-ddump-splices"
6 |
7 | #TESTS="simple template atn coreg4 ll lr lexer g4 sexpression simple allstar chisel"
8 | TESTS=$(cd test
9 | for f in *; do
10 | if ! [[ "$f" = "shared"* ]]; then
11 | echo -n "$f "
12 | fi
13 | done)
14 | failures=""
15 | echo "Running tests: $TESTS"
16 |
17 | for t in $TESTS; do
18 | stack test antlr-haskell:$t $OPTS
19 | if [ $? -eq 1 ]; then
20 | count=$((count + 1))
21 | failures="$t $failures"
22 | fi
23 | done
24 | echo "Total number of failures: $count"
25 | echo "Failed on: " $failures
26 |
27 |
--------------------------------------------------------------------------------
/docs/IntPair_submachine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/IntPair_submachine.png
--------------------------------------------------------------------------------
/docs/MyChar_ATN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/MyChar_ATN.png
--------------------------------------------------------------------------------
/docs/PExpression_ATN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PExpression_ATN.png
--------------------------------------------------------------------------------
/docs/PTuple_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_1.png
--------------------------------------------------------------------------------
/docs/PTuple_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_2.png
--------------------------------------------------------------------------------
/docs/PTuple_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_3.png
--------------------------------------------------------------------------------
/docs/PTuple_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_4.png
--------------------------------------------------------------------------------
/lang_demo.hs:
--------------------------------------------------------------------------------
1 | {-#LANGUAGE TypeFamilies #-}
2 | module LangDemo where
3 |
4 | -- If you never change the base value and are only adding expressions
5 | data BaseVal = IntV Int
6 |
7 | data BaseExp = ValE BaseVal
8 | | AddE BaseExp BaseExp
9 |
10 | data ExtExp = BaseE BaseExp
11 | | SubE ExtExp ExtExp
12 |
13 |
14 |
15 | class Lang lang where
16 | eval :: lang -> BaseVal
17 |
18 | instance Lang BaseExp where
19 | eval (ValE i) = i
20 | eval (AddE be1 be2) =
21 | let IntV i1 = eval be1
22 | IntV i2 = eval be2
23 | in IntV $ i1 + i2
24 |
25 | instance Lang ExtExp where
26 | eval (BaseE b) = eval b
27 | eval (SubE ee1 ee2) =
28 | let IntV i1 = eval ee1
29 | IntV i2 = eval ee2
30 | in IntV $ i1 - i2
31 |
32 | -- if you want to be able to change the set of values you can evaluate to when extending the language
33 |
34 | data BaseVal2 = IntV2 Int
35 |
36 | data BaseExp2 = ValBE2 BaseVal2
37 | | AddE2 BaseExp2 BaseExp2
38 |
39 | data ExtVal2 = StringV2 String
40 |
41 | data ExtExp2 = BaseE2 BaseExp2
42 | | ValEE2 ExtVal2
43 | | ConcatE2 ExtExp2 ExtExp2
44 |
45 | -- can use type families to suspend the value kind until later.
46 | data family Val2
47 |
48 | class Lang2 lang where
49 | eval :: lang -> Val2
50 |
51 | -- define at some time t1 for the base language
52 | data instance Val2 = BV2 BaseVal2
53 |
54 | instance Lang2 BaseExp2 where
55 | eval ValBE2 bv2 = BV2 bv2
56 | eval AddE2 be1 be2 =
57 | let IntV2 i1 = eval be1
58 | IntV2 i2 = eval be2
59 | in BV2 $ IntV2 $ i1 + i2
60 |
61 |
62 | -- extend at some later time t2 for the extended language
63 | data instance Val2 = EV2 ExtVal2
64 |
65 | instance Lang2 ExtExp2 where
66 | eval BaseE2 be2 = eval be2
67 | eval ValEE2 ev2 = ev2
68 | eval ConcatE2 ee1 ee2 =
69 | case (eval ee1, eval ee2) of
70 | (StringV2 s1, StringV2 s2) -> EV2 $ StringV2 $ s1 ++ s2
71 | _ -> error "trying to concat something other than strings!"
72 |
73 |
--------------------------------------------------------------------------------
/src/Language/ANTLR4.hs:
--------------------------------------------------------------------------------
1 | {-|
2 | Module : Language.ANTLR4
3 | Description : Primary entrypoint for top-level antlr-haskell users
4 | Copyright : (c) Karl Cronburg, 2018
5 | License : BSD3
6 | Maintainer : karl@cs.tufts.edu
7 | Stability : experimental
8 | Portability : POSIX
9 | -}
10 | module Language.ANTLR4 (
11 | -- * Functions
12 | -- | Compile-time support for expanding LR-specific data types:
13 | mkLRParser
14 | -- | Other basic functions used in generated code:
15 | , (&&&)
16 | -- * Module exports
17 | -- | Most importantly for the Grammar type so that the quasiquoter can generate
18 | -- new grammar itself:
19 | , module Text.ANTLR.Grammar
20 | -- | Supporting data types and instances so that the spliced AST translator
21 | -- functions can talk about parse events, tokens, and EOF:
22 | , module Text.ANTLR.Parser
23 | -- | Regular expressions used during tokenization, as opposed to
24 | -- 'Language.ANTLR4.Regex' which are regexes used for G4 parsing:
25 | , module Text.ANTLR.Lex.Regex
26 | -- | The G4 quasiquoter and accompanying grammar:
27 | , g4, g4_parsers
28 | -- | For defining pretty-printable instances of quasiquoter-generated data types:
29 | , module Text.ANTLR.Pretty
30 | -- | Tokenizer:
31 | , module T
32 | -- * Type exports
33 | -- | Typeclass instances for quasiquoter-generated data types:
34 | , Hashable(..), Generic(..), Data(..), Lift(..)
35 | -- | Parser interface data types:
36 | , S.Set(..), T.Token(..), LRResult(..)
37 | -- | Grammar interface data types:
38 | , Directive(..), PRHS(..), TermAnnot(..)
39 | )
40 | where
41 |
42 | import Text.ANTLR.Grammar
43 | import Text.ANTLR.Parser
44 |
45 | import Text.ANTLR.LR as LR
46 | import Text.ANTLR.Lex.Tokenizer as T
47 | import Text.ANTLR.Set as S
48 |
49 | import Text.ANTLR.Set (Hashable(..), Generic(..))
50 | import Text.ANTLR.Pretty
51 | import Control.Arrow ( (&&&) )
52 | import Text.ANTLR.Lex.Regex
53 |
54 | import Language.ANTLR4.G4
55 | import Language.ANTLR4.Parser
56 | import Language.ANTLR4.Boot.Quote (mkLRParser, g4_parsers)
57 |
58 | import Data.Data (Data(..))
59 | import Language.Haskell.TH.Lift (Lift(..))
60 |
61 | import Language.ANTLR4.Boot.Syntax
62 | (Directive(..), PRHS(..), TermAnnot(..), ProdElem(..))
63 |
64 |
--------------------------------------------------------------------------------
/src/Language/ANTLR4/Boot/Parser.hs.boot:
--------------------------------------------------------------------------------
1 | {-#LANGUAGE QuasiQuotes, TemplateHaskell#-}
2 | {-|
3 | Module : Language.ANTLR4.Boot.Parser
4 | Description : ANTLR4 boot parser written with parsec
5 | Copyright : (c) Karl Cronburg, 2018
6 | License : BSD3
7 | Maintainer : karl@cs.tufts.edu
8 | Stability : experimental
9 | Portability : POSIX
10 | -}
11 | module Language.ANTLR4.Boot.Parser where
12 | -- syntax (Exp)
13 | import Language.Haskell.TH
14 | import Language.Haskell.TH.Syntax
15 | import qualified Debug.Trace as D (trace, traceM)
16 |
17 | import qualified Language.Haskell.Meta as LHM
18 |
19 | -- monadic ops
20 | import Control.Monad (mapM)
21 | -- parsec
22 | import Text.ParserCombinators.Parsec
23 | import qualified Text.Parsec.String as PS
24 | import qualified Text.Parsec.Prim as PP
25 | import qualified Text.Parsec.Token as PT
26 | import qualified Text.Parsec.Expr as PE
27 | import qualified Text.Parsec.Combinator as PC
28 | import Text.ParserCombinators.Parsec.Language (haskellStyle, reservedOpNames, reservedNames
29 | , commentLine, commentStart, commentEnd)
30 | import Text.ParserCombinators.Parsec.Pos (newPos)
31 | -- text munging
32 | import Data.Char
33 |
34 | import Language.ANTLR4.Boot.Syntax
35 | import Language.ANTLR4.Regex (parseRegex, regexP)
36 |
37 | --traceM s = D.traceM ("[ANTLR4.Boot.Parser] " ++ s)
38 | traceM = return
39 |
40 | ------------------------------------------------------------------------------
41 | -- Or-Try Combinator (tries two parsers, one after the other)
42 | (<||>) a b = try a <|> try b
43 |
44 | parseANTLR :: SourceName -> Line -> Column -> String -> Either ParseError [G4]
45 | parseANTLR fileName line column input =
46 | PP.parse result fileName input
47 | where
48 |
49 | result = do
50 | setPosition (newPos fileName line column)
51 | whiteSpace
52 | x <- gExps
53 | traceM $ show x
54 | eof <|> errorParse
55 | return x
56 |
57 | errorParse = do
58 | rest <- manyTill anyToken eof
59 | unexpected $ '"' : rest ++ "\""
60 |
61 | gExps :: PS.Parser [G4]
62 | gExps = concat <$> many1 gExp
63 |
64 | gExp :: PS.Parser [G4]
65 | gExp = do
66 | traceM "gExp"
67 | whiteSpace
68 | xs <- grammarP <||> lexerP <||> prodP
69 | traceM $ show xs
70 | return xs
71 |
72 | grammarP :: PS.Parser [G4]
73 | grammarP = do
74 | reserved "grammar"
75 | h <- upper
76 | t <- manyTill anyToken (reservedOp ";")
77 | traceM $ show $ Grammar (h : t)
78 | return [Grammar (h : t)]
79 |
80 | -- | Assumptions:
81 | --
82 | -- * Directives must be on a single line.
83 | -- *
84 | prodP :: PS.Parser [G4]
85 | prodP = do
86 | h <- lower
87 | t <- manyTill anyChar (reservedOp ":")
88 | traceM $ "[prodP] " ++ trim (h : t)
89 | rhsList <- sepBy1 rhsP (traceM "rhsList..." >> reservedOp "|")
90 | traceM $ "[prodP.rhsList] " ++ show rhsList
91 | reservedOp ";"
92 | traceM "prodP returning..."
93 | return [Prod (trim (h : t)) (concat rhsList)]
94 | where
95 | rhsP = do
96 | mPred <- optionMaybe predP
97 | traceM $ "[rhsP0] " ++ show mPred
98 | alphaList <- many alphaP
99 | traceM $ "[rhsP] " ++ show alphaList
100 | mMute <- optionMaybe muteP
101 | pDir <- optionMaybe directiveP
102 | whiteSpace
103 | return [PRHS alphaList mPred mMute pDir]
104 | alphaP = termP <||> nonTermP
105 | termP = do
106 | whiteSpace
107 | char '\''
108 | traceM "[prodP.termP.s] "
109 | s <- manyTill anyChar $ char '\''
110 | whiteSpace
111 | traceM $ "[prodP.termP.s] " ++ show s
112 | return $ GTerm NoAnnot s
113 | nonTermP = do
114 | s <- identifier
115 | traceM $ "[nonTermP] " ++ s
116 | return $ GNonTerm NoAnnot s
117 | predP = do
118 | traceM "[predP]"
119 | reservedOp "{"
120 | haskellParseExpTill "}?"
121 | muteP = do
122 | traceM "[muteP]"
123 | reservedOp "{"
124 | haskellParseExpTill "}"
125 | directiveP = do
126 | whiteSpace
127 | symbol "->"
128 | whiteSpace
129 | str <- manyTill anyChar (char '\n')
130 | whiteSpace
131 | traceM $ "[directiveP]" ++ show str
132 | return (toDirective $ trim $ str)
133 |
134 | -- TODO: not use getInput
135 | rEOF = do
136 | y <- getInput
137 | return (case y of
138 | '-':'>':_ -> True
139 | ';':_ -> True
140 | _ -> False)
141 |
142 | toDirective [] = LowerD []
143 | toDirective s@(h:rst)
144 | | isUpper h = UpperD s
145 | | otherwise = LowerD s
146 |
147 | lexerP :: PS.Parser [G4]
148 | lexerP = do
149 | mAnnot <- optionMaybe annot
150 | h <- upper
151 | t <- manyTill anyChar (reservedOp ":")
152 | traceM $ "Lexeme Name: " ++ (h:t)
153 | r <- regexP rEOF
154 | traceM $ "Regex: " ++ show r
155 | optionMaybe $ symbol "->"
156 | mDir <- optionMaybe $ manyTill anyToken (reservedOp ";")
157 | return $ [Lex mAnnot (trim (h : t)) (LRHS r (toDirective <$> trim <$> mDir))]
158 | where
159 | annot = fragment -- <||> ....
160 | fragment = do
161 | reserved "fragment"
162 | return Fragment
163 |
164 | -- Parser combinators end
165 | haskellParseExpTill :: String -> PS.Parser Exp
166 | haskellParseExpTill op = do {
167 | _ <- whiteSpace
168 | ; str <- manyTill anyChar (reservedOp op)
169 | ; haskellParseExp str
170 | }
171 | haskellParseExp :: String -> PS.Parser Exp
172 | haskellParseExp str =
173 | case LHM.parseExp str of
174 | Left err -> error err -- PP.parserZero
175 | Right expTH -> return expTH
176 |
177 | whiteSpaceOrComment = comment <||> whiteSpace
178 | where
179 | comment = do
180 | whiteSpace
181 | reservedOp "//"
182 | (manyTill anyChar $ try $ string "\n") <||> (manyTill anyChar $ try $ string "\r")
183 | return ()
184 |
185 | ------------------------------------------------------------------------------
186 | -- Lexer
187 | lexer :: PT.TokenParser ()
188 | lexer = PT.makeTokenParser $ haskellStyle
189 | { reservedOpNames = [";", "|", ":", "{", "}", "}?", "'"]
190 | , reservedNames = ["grammar"]
191 | , commentLine = "//"
192 | , commentStart = "/*"
193 | , commentEnd = "*/"
194 | }
195 |
196 | whiteSpace = PT.whiteSpace lexer
197 | identifier = PT.identifier lexer
198 | operator = PT.operator lexer
199 | reserved = PT.reserved lexer
200 | reservedOp = PT.reservedOp lexer
201 | charLiteral = PT.charLiteral lexer
202 | stringLiteral = PT.stringLiteral lexer
203 | integer = PT.integer lexer
204 | natural = PT.natural lexer
205 | commaSep1 = PT.commaSep1 lexer
206 | parens = PT.parens lexer
207 | braces = PT.braces lexer
208 | brackets = PT.brackets lexer
209 | symbol = PT.symbol lexer
210 |
211 | expr = PE.buildExpressionParser table term
212 | > "expression"
213 | term = natural
214 | > "simple expression"
215 | table = [ [prefix "-" negate, prefix "+" id ] ]
216 | prefix name fun = PE.Prefix $ reservedOp name >> return fun
217 |
218 | -- http://stackoverflow.com/a/6270382
219 | trim xs = dropSpaceTail "" $ dropWhile isSpace xs
220 |
221 | dropSpaceTail maybeStuff "" = ""
222 | dropSpaceTail maybeStuff (x:xs)
223 | | isSpace x = dropSpaceTail (x:maybeStuff) xs
224 | | null maybeStuff = x : dropSpaceTail "" xs
225 | | otherwise = reverse maybeStuff ++ x : dropSpaceTail "" xs
226 |
--------------------------------------------------------------------------------
/src/Language/ANTLR4/Boot/Syntax.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveLift, DeriveAnyClass, DeriveGeneric, OverloadedStrings #-}
2 | {-|
3 | Module : Language.ANTLR4.Boot.Syntax
4 | Description : Both the boot and core syntax data types for G4
5 | Copyright : (c) Karl Cronburg, 2018
6 | License : BSD3
7 | Maintainer : karl@cs.tufts.edu
8 | Stability : experimental
9 | Portability : POSIX
10 | -}
11 | module Language.ANTLR4.Boot.Syntax
12 | ( G4(..), PRHS(..), ProdElem(..), GAnnot(..)
13 | , Directive(..)
14 | , LRHS(..), Regex(..), isGTerm, isGNonTerm
15 | , TermAnnot(..), isMaybeAnnot, isNoAnnot, annot
16 | , prodElemSymbol
17 | ) where
18 | import Text.ANTLR.Grammar ()
19 | import Language.Haskell.TH.Lift (Lift(..))
20 |
21 | import Language.Haskell.TH.Syntax (Exp)
22 | import qualified Language.Haskell.TH.Syntax as S
23 |
24 | import Text.ANTLR.Set ( Hashable(..), Generic(..) )
25 | import Text.ANTLR.Pretty -- (rshow, Prettify(..), pStr, pshow, pStr')
26 |
27 | -- | .g4 style syntax representation
28 | data G4 = -- | Grammar name declaration in g4
29 | Grammar { gName :: String -- ^ Name
30 | }
31 | -- | One or more g4 productions
32 | | Prod { pName :: String -- ^ Production's name
33 | , patterns :: [PRHS] -- ^ List of rules to match on
34 | }
35 | -- | A single, possibly annotated, g4 lexical rule
36 | | Lex { annotation :: Maybe GAnnot -- ^ Lexical annotation (@fragment@)
37 | , lName :: String -- ^ Lexical rule name
38 | , pattern :: LRHS -- ^ The regex to match on
39 | }
40 | deriving (Show, Eq, Lift, Generic, Hashable)
41 |
42 | instance Prettify G4 where
43 | prettify (Grammar gn) = do
44 | pStr "grammar "
45 | pStr' gn
46 | prettify (Prod n ps) = do
47 | pStr' n
48 | pStr " -> "
49 | incrIndent $ length n + 4
50 | pListLines ps
51 | incrIndent $ 0 - (length n + 4)
52 | prettify (Lex annot ln (LRHS regex dir)) = do
53 | pStr' ln
54 | pStr " -> "
55 | incrIndent $ length ln + 4
56 | prettify regex
57 | incrIndent $ 0 - (length ln + 4)
58 | pStr "("
59 | prettify dir
60 | pStr ")"
61 |
62 |
63 | instance Lift Exp
64 |
65 | -- | The right-hand side of a G4 production rule.
66 | data PRHS = PRHS
67 | { alphas :: [ProdElem] -- ^ In-order list of elements defining this rule
68 | , pred :: Maybe Exp -- ^ Arbitrary boolean predicate to test whether or not this rule should fire
69 | , mutator :: Maybe Exp -- ^ Arbitrary mutator to run when this rule fires
70 | , pDirective :: Maybe Directive -- ^ How to construct a Haskell type when this rules fires
71 | } deriving (Show, Eq, Lift, Generic)
72 |
73 | instance Prettify PRHS where
74 | prettify (PRHS as pred mut pDir) = do
75 | prettify as
76 | pStr "("
77 | pStr' $ show pred; pStr ","
78 | pStr' $ show mut; pStr ","
79 | prettify pDir; pStr ")"
80 |
81 | -- | Antiquoted (or g4-embedded) string that goes to the right of an arrow in
82 | -- a g4 production rule. This specifies how to construct a Haskell type.
83 | data Directive =
84 | UpperD String -- ^ Probably a Haskell data constructor
85 | | LowerD String -- ^ Probably just a Haskell function to call
86 | | HaskellD String -- ^ Arbitrary antiquoted Haskell code embedded in the G4 grammar
87 | deriving (Show, Eq, Ord, Lift, Generic, Hashable)
88 |
89 | instance Prettify Directive where prettify = rshow
90 |
91 | instance Hashable PRHS where
92 | hashWithSalt salt prhs = salt `hashWithSalt` alphas prhs
93 |
94 | -- | Annotations on a term (nonterminal or terminal) for extending our G4
95 | -- BNF-like syntax with regular expression modifiers.
96 | data TermAnnot =
97 | Regular Char -- ^ Regular expression modifier (e.g. +, ?, *)
98 | | NoAnnot -- ^ Term is not annotated with anything
99 | deriving (Show, Eq, Ord, Lift, Generic, Hashable)
100 |
101 | instance Prettify TermAnnot where
102 | prettify NoAnnot = return ()
103 | prettify (Regular c) = pStr' [c]
104 |
105 | -- | Get the annotation from a 'ProdElem'
106 | annot :: ProdElem -> TermAnnot
107 | annot (GTerm a _) = a
108 | annot (GNonTerm a _) = a
109 |
110 | -- | Is this 'TermAnnot' a maybe?
111 | isMaybeAnnot :: TermAnnot -> Bool
112 | isMaybeAnnot (Regular '?') = True
113 | isMaybeAnnot _ = False
114 |
115 | -- | Does this 'TermAnnot' have no annotation?
116 | isNoAnnot :: TermAnnot -> Bool
117 | isNoAnnot NoAnnot = True
118 | isNoAnnot _ = False
119 |
120 | -- | A single production element with any accompanying regex annotation
121 | data ProdElem =
122 | GTerm TermAnnot String -- ^ G4 terminal
123 | | GNonTerm TermAnnot String -- ^ G4 nonterminal
124 | deriving (Show, Eq, Ord, Lift, Generic, Hashable)
125 |
126 | instance Prettify ProdElem where
127 | prettify (GTerm annot s) = do
128 | pStr' s
129 | prettify annot
130 | prettify (GNonTerm annot s) = do
131 | pStr' s
132 | prettify annot
133 |
134 | prodElemSymbol (GTerm _ s) = s
135 | prodElemSymbol (GNonTerm _ s) = s
136 |
137 | -- | Is this a terminal G4 element?
138 | isGTerm (GTerm _ _) = True
139 | isGTerm _ = False
140 |
141 | -- | Is this a nonterminal G4 element?
142 | isGNonTerm (GNonTerm _ _) = True
143 | isGNonTerm _ = False
144 |
145 | -- | Allowable annotations on a lexical production rule
146 | data GAnnot = Fragment -- ^ For now the only annotation is @fragment@.
147 | deriving (Show, Eq, Lift, Generic, Hashable)
148 |
149 | -- | Right-hand side of a lexical G4 rule
150 | data LRHS = LRHS
151 | { regex :: Regex Char -- ^ A regular expression over characters as tokens.
152 | , directive :: Maybe Directive -- ^ Optional directive: @Nothing@ is equivalent to @(Just "String")@.
153 | }
154 | deriving (Show, Eq, Lift, Generic, Hashable)
155 |
156 | -- | G4 representation of a regex (G4 regex syntax, not regexs used by tokenizer)
157 | data Regex s =
158 | Epsilon -- ^ Consume no input
159 | | Literal [s] -- ^ Match on a literal string (sequence of characters)
160 | | Union [Regex s] -- ^ Match on any
161 | | Concat [Regex s] -- ^ Match in sequence
162 | | Kleene (Regex s) -- ^ Match zero or more times
163 | | PosClos (Regex s) -- ^ Match one or more times
164 | | Question (Regex s) -- ^ Match zero or one time.
165 | | CharSet [s] -- ^ Match once on any of the characters
166 | | Negation (Regex s) -- ^ Match anything that doesn't match this
167 | | Named String -- ^ A reference to some other regex (need to track an environment)
168 | deriving (Lift, Eq, Show, Generic, Hashable)
169 | -- TODO: Lex regexs (e.g. complement sets, escape chars, ...)
170 | -- TODO: Set s, and ranges of characters
171 |
172 | instance (Show s, Prettify s) => Prettify (Regex s) where
173 | prettify Epsilon = pStr "ε"
174 | prettify (Literal ss) = do
175 | pStr "\""
176 | mapM prettify ss
177 | pStr "\""
178 | prettify rest = pStr' $ show rest
179 |
180 |
--------------------------------------------------------------------------------
/src/Language/ANTLR4/FileOpener.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE QuasiQuotes, TemplateHaskell, ScopedTypeVariables,
2 | OverloadedStrings #-}
3 | {-|
4 | Module : Language.ANTLR4.FileOpener
5 | Description : Quasiquoter for reading files by name at compile time
6 | Copyright : (c) Karl Cronburg, 2018
7 | License : BSD3
8 | Maintainer : karl@cs.tufts.edu
9 | Stability : experimental
10 | Portability : POSIX
11 |
12 | Just do the following. It'll make sense:
13 |
14 | @
15 | foo = id
16 | file_contents = [open| test/file.foo |]
17 | @
18 | -}
19 | module Language.ANTLR4.FileOpener (
20 | -- * File opening quasiquoter
21 | open
22 | ) where
23 | import qualified Language.Haskell.TH as TH
24 | import Language.Haskell.TH.Syntax (Exp(..), addDependentFile)
25 | import Language.Haskell.TH.Quote (QuasiQuoter(..))
26 |
27 | import Data.Text (strip, splitOn, pack, unpack)
28 |
29 | -- | A quasiquoter for opening a file on disk, reading its contents, and running
30 | -- a function by the same name as the file extension. e.g.:
31 | --
32 | -- @
33 | -- foo = id
34 | -- file_contents = [open| test/file.foo |]
35 | -- @
36 | --
37 | -- @foo@ gets called on the contents of files with the extension @.foo@.
38 | open :: QuasiQuoter
39 | open = QuasiQuoter
40 | { quoteExp = openExp
41 | , quotePat = error "parse pattern"
42 | , quoteType = error "parse type"
43 | , quoteDec = error "parse decl?"
44 | }
45 |
46 | -- | Reads a file and runs a function with the name of the file extension,
47 | -- returning the result for use by a quasiquoter.
48 | openExp :: String -> TH.Q TH.Exp
49 | openExp s = let
50 | fn = unpack $ strip $ pack s
51 | ext = unpack $ last $ splitOn "." $ pack fn
52 | in do
53 | file_contents <- TH.runIO (readFile fn)
54 | addDependentFile fn
55 | [| $(return $ TH.VarE $ TH.mkName ext) file_contents |]
56 |
57 |
--------------------------------------------------------------------------------
/src/Language/ANTLR4/G4.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, DeriveDataTypeable
4 | , TemplateHaskell #-}
5 | {-|
6 | Module : Language.ANTLR4.G4
7 | Description : Core G4 quasiquoter for antlr-haskell
8 | Copyright : (c) Karl Cronburg, 2018
9 | License : BSD3
10 | Maintainer : karl@cs.tufts.edu
11 | Stability : experimental
12 | Portability : POSIX
13 |
14 | Until better haddock integration is developed, you'll need to look
15 | at the source for this module to see the G4 grammar for G4.
16 | -}
17 | module Language.ANTLR4.G4 where
18 |
19 | import Control.Arrow ( (&&&) )
20 | import Data.Char (isUpper)
21 |
22 | import Text.ANTLR.Common
23 | import Text.ANTLR.Grammar
24 | import Text.ANTLR.Parser
25 | import qualified Text.ANTLR.LR as LR
26 | import Text.ANTLR.Lex.Tokenizer as T
27 | import qualified Text.ANTLR.Set as S
28 | import Text.ANTLR.Set (Hashable(..), Generic(..))
29 | import Text.ANTLR.Pretty
30 | import Text.ANTLR.Lex.Regex (regex2dfa)
31 | import Data.Data (Data(..))
32 | import Language.Haskell.TH.Lift (Lift(..))
33 |
34 | import Language.Haskell.TH.Quote (QuasiQuoter(..))
35 | import qualified Language.Haskell.TH as TH
36 | import Language.ANTLR4.Boot.Quote (antlr4)
37 | import Language.ANTLR4.Syntax
38 | import qualified Language.ANTLR4.Boot.Syntax as G4S
39 | import qualified Language.ANTLR4.Boot.Quote as G4Q
40 |
41 | import Debug.Trace as D
42 |
43 | char :: String -> Char
44 | char = head
45 |
46 | append :: String -> String -> String
47 | append = (++)
48 |
49 | list a = [a]
50 | cons = (:)
51 | lexemeDirective r d = G4S.LRHS r (Just d)
52 | lexemeNoDir r = G4S.LRHS r Nothing
53 | lexDecl = G4S.Lex Nothing
54 | lexFragment = G4S.Lex (Just G4S.Fragment)
55 |
56 | literalRegex :: String -> G4S.Regex Char
57 | literalRegex = G4S.Literal
58 |
59 | prodDirective as d = G4S.PRHS as Nothing Nothing (Just d)
60 | prodNoDir as = G4S.PRHS as Nothing Nothing Nothing
61 | prodNoAlphas d = G4S.PRHS [] Nothing Nothing (Just d)
62 | prodNothing = G4S.PRHS [] Nothing Nothing Nothing
63 |
64 | list2 a b = [a,b]
65 | range a b = [a .. b]
66 |
67 | gterm = G4S.GTerm G4S.NoAnnot
68 | gnonTerm = G4S.GNonTerm G4S.NoAnnot
69 |
70 | maybeGTerm = G4S.GTerm (G4S.Regular '?')
71 | maybeGNonTerm = G4S.GNonTerm (G4S.Regular '?')
72 |
73 | starGTerm = G4S.GTerm (G4S.Regular '*')
74 | starGNonTerm = G4S.GNonTerm (G4S.Regular '*')
75 |
76 | plusGTerm = G4S.GTerm (G4S.Regular '+')
77 | plusGNonTerm = G4S.GNonTerm (G4S.Regular '+')
78 |
79 | regexAnyChar = G4S.Negation (G4S.CharSet [])
80 |
81 | dQual [] = G4S.UpperD []
82 | dQual xs = case last xs of
83 | [] -> G4S.UpperD $ concatWith "." xs
84 | (a:as)
85 | | isUpper a -> G4S.UpperD $ concatWith "." xs
86 | | otherwise -> G4S.LowerD $ concatWith "." xs
87 |
88 | qDir l u = [l,u]
89 |
90 | haskellD = G4S.HaskellD
91 |
92 | -- Force the above declarations (and their types) into scope:
93 | $( return [] )
94 |
95 | [antlr4|
96 | grammar G4;
97 |
98 | decls : decl1 ';' -> list
99 | | decl1 ';' decls -> cons
100 | ;
101 |
102 | decl1 : 'grammar' UpperID -> G4S.Grammar
103 | | LowerID ':' prods -> G4S.Prod
104 | | UpperID ':' lexemeRHS -> lexDecl
105 | | 'fragment' UpperID ':' lexemeRHS -> lexFragment
106 | ;
107 |
108 | prods : prodRHS -> list
109 | | prodRHS '|' prods -> cons
110 | ;
111 |
112 | lexemeRHS : regexes1 '->' directive -> lexemeDirective
113 | | regexes1 -> lexemeNoDir
114 | ;
115 |
116 | prodRHS : alphas '->' directive -> prodDirective
117 | | alphas -> prodNoDir
118 | | '->' directive -> prodNoAlphas
119 | | -> prodNothing
120 | ;
121 |
122 | directive : qDirective -> dQual
123 | | UpperID -> G4S.UpperD
124 | | LowerID -> G4S.LowerD
125 | | '${' HaskellExp '}' -> haskellD
126 | ;
127 |
128 | qDirective : UpperID '.' qDot -> qDir
129 | ;
130 |
131 | qDot : UpperID
132 | | LowerID
133 | ;
134 |
135 | alphas : alpha -> list
136 | | alpha alphas -> cons
137 | | '(' alphas ')'
138 | | '(' alphas ')' '?'
139 | | '(' alphas ')' '*'
140 | | '(' alphas ')' '+'
141 | ;
142 |
143 | alpha : Literal '?' -> maybeGTerm
144 | | LowerID '?' -> maybeGNonTerm
145 | | UpperID '?' -> maybeGNonTerm
146 | | Literal '*' -> starGTerm
147 | | LowerID '*' -> starGNonTerm
148 | | UpperID '*' -> starGNonTerm
149 | | Literal '+' -> plusGTerm
150 | | LowerID '+' -> plusGNonTerm
151 | | UpperID '+' -> plusGNonTerm
152 | | Literal -> gterm
153 | | LowerID -> gnonTerm
154 | | UpperID -> gnonTerm
155 | ;
156 |
157 | // Regex Stuff:
158 |
159 | regexes1 : regexes -> G4S.Concat
160 | ;
161 |
162 | regexes : regex -> list
163 | | regex regexes -> cons
164 | ;
165 |
166 | regex : regex1 '?' -> G4S.Question
167 | | regex1 '*' -> G4S.Kleene
168 | | regex1 '+' -> G4S.PosClos
169 | | '~' regex1 -> G4S.Negation
170 | | regex1 -> id
171 | ;
172 |
173 | regex1 : '[' charSet ']' -> G4S.CharSet
174 | | Literal -> literalRegex
175 | | UpperID -> G4S.Named
176 | | '(' regexes1 ')'
177 | | unionR -> G4S.Union
178 | | '.' -> regexAnyChar
179 | ;
180 |
181 | unionR : regex '|' regex -> list2
182 | | regex '|' unionR -> cons
183 | ;
184 |
185 | charSet : charSet1 -> id
186 | | charSet1 charSet -> append
187 | ;
188 |
189 | charSet1 : SetChar '-' SetChar -> range
190 | | SetChar -> list
191 | | EscapedChar -> list
192 | ;
193 |
194 | UpperID : [A-Z][a-zA-Z0-9_]* -> String;
195 | LowerID : [a-z][a-zA-Z0-9_]* -> String;
196 | Literal : '\'' ( ( '\\\'' ) | (~ ( '\'' ) ) )+ '\'' -> stripQuotesReadEscape;
197 | LineComment : '//' (~ '\n')* '\n' -> String;
198 |
199 | HaskellExp : ( ~ '}' )+ -> String;
200 |
201 | SetChar : ~ ']' -> char ;
202 | WS : [ \t\n\r\f\v]+ -> String;
203 | EscapedChar : '\\' [tnrfv] -> readEscape ;
204 |
205 | |]
206 |
207 |
--------------------------------------------------------------------------------
/src/Language/ANTLR4/Parser.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, DeriveDataTypeable
4 | , TemplateHaskell #-}
5 | {-|
6 | Module : Language.ANTLR4.Parser
7 | Description : Core G4 quasiquoter (with parsers) for antlr-haskell
8 | Copyright : (c) Karl Cronburg, 2019
9 | License : BSD3
10 | Maintainer : karl@cs.tufts.edu
11 | Stability : experimental
12 | Portability : POSIX
13 |
14 | -}
15 | module Language.ANTLR4.Parser (g4) where
16 |
17 | import Control.Arrow ( (&&&) )
18 | import Data.Char (isUpper)
19 |
20 | import Text.ANTLR.Common
21 | import Text.ANTLR.Grammar
22 | import Text.ANTLR.Parser
23 | import qualified Text.ANTLR.LR as LR
24 | import Text.ANTLR.Lex.Tokenizer as T hiding (tokenize)
25 | import qualified Text.ANTLR.Set as S
26 | import Text.ANTLR.Set (Hashable(..), Generic(..))
27 | import Text.ANTLR.Pretty
28 | import Text.ANTLR.Lex.Regex (regex2dfa)
29 | import Data.Data (Data(..))
30 | import Language.Haskell.TH.Lift (Lift(..))
31 |
32 | import Language.Haskell.TH.Quote (QuasiQuoter(..))
33 | import qualified Language.Haskell.TH as TH
34 | import Language.ANTLR4.Boot.Quote (antlr4, g4_parsers)
35 | import Language.ANTLR4.Syntax
36 | import qualified Language.ANTLR4.Boot.Syntax as G4S
37 | import qualified Language.ANTLR4.Boot.Quote as G4Q
38 | import Language.ANTLR4.G4
39 |
40 | import Debug.Trace as D
41 |
42 | -- Splice the parsers for the grammar we defined in Language.ANTLR4.G4
43 | $(g4_parsers g4AST g4Grammar)
44 |
45 | {- isWhitespace (Just T_LineComment) = True
46 | isWhitespace (Just T_WS) = True
47 | isWhitespace Nothing = True
48 | isWhitespace _ = False -}
49 |
50 | isWhitespace T_LineComment = True
51 | isWhitespace T_WS = True
52 | isWhitespace _ = False
53 |
54 | g4_codeGen :: String -> TH.Q [TH.Dec]
55 | g4_codeGen input = do
56 | loc <- TH.location
57 | let fileName = TH.loc_filename loc
58 | let (line,column) = TH.loc_start loc
59 |
60 | {- case allstarParse (filter (not . isWhitespace . stripEOF . getSymbol) (tokenize input)) of
61 | Left err -> error err
62 | Right ast -> codeGen ast -}
63 | case glrParse isWhitespace input of
64 | (LR.ResultAccept ast) -> codeGen ast
65 | LR.ResultSet s ->
66 | if S.size s == 1
67 | then codeGen $ fromAccept (S.findMin s)
68 | else D.trace (pshow' s) $ codeGen $ fromAccept (S.findMin s)
69 | err -> error $ pshow' err
70 |
71 | fromAccept (LR.ResultAccept ast) = ast
72 | fromAccept err = error $ pshow' err
73 |
74 | codeGen ast = G4Q.g4_decls $ ast2decls ast
75 |
76 | -- | Entrypoint to the G4 quasiquoter. Currently only supports declaration-level
77 | -- Haskell generation of G4 grammars using a GLR parser. The output grammars
78 | -- need not use a GLR parser themselves.
79 | g4 :: QuasiQuoter
80 | g4 = QuasiQuoter
81 | (error "parse exp")
82 | (error "parse pattern")
83 | (error "parse type")
84 | g4_codeGen
85 |
86 |
--------------------------------------------------------------------------------
/src/Language/ANTLR4/Regex.hs.boot:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveLift, DeriveGeneric, DeriveAnyClass #-}
2 | {-|
3 | Module : Language.ANTLR4.Regex
4 | Description : Parsec parser for G4 regular expressions
5 | Copyright : (c) Karl Cronburg, 2018
6 | License : BSD3
7 | Maintainer : karl@cs.tufts.edu
8 | Stability : experimental
9 | Portability : POSIX
10 | -}
11 | module Language.ANTLR4.Regex (parseRegex, regexP) where
12 | import Language.Haskell.TH.Lift (Lift(..))
13 | import Text.ParserCombinators.Parsec
14 | import qualified Text.Parsec.String as PS
15 | import qualified Text.Parsec.Prim as PP
16 | import qualified Text.Parsec.Token as PT
17 | import qualified Text.Parsec.Expr as PE
18 | import qualified Text.Parsec.Combinator as PC
19 | import Data.Char (ord)
20 | import Text.ParserCombinators.Parsec.Language
21 | import qualified Debug.Trace as D -- trace, traceM
22 |
23 | import Text.ANTLR.Set ( Hashable(..), Generic(..) )
24 | import Language.ANTLR4.Boot.Syntax ( Regex(..) )
25 |
26 | --traceM s = D.traceM ("[Regex] " ++ s)
27 | traceM = return
28 |
29 | (<||>) a b = try a <|> try b
30 |
31 | rEOF' = do
32 | (eof >>= return . const True)
33 | <||>
34 | (return False)
35 |
36 | -- | Entrypoint for parsing a G4 regex. This does not get called
37 | -- by the spliced parser, and is here for posterity (and debugging /
38 | -- backwards compatibility).
39 | parseRegex :: String -> Either ParseError (Regex Char)
40 | parseRegex input = PP.parse (regexP rEOF') "" input
41 |
42 | type RegexC = Regex Char
43 |
44 | -- convert list of sequential regexes into a single regex
45 | list2regex [] = Epsilon
46 | list2regex [x] = x
47 | list2regex xs = Concat xs
48 |
49 | -- | G4 regex parser, as used exclusively by the boot parser.
50 | --
51 | -- rEOF is a parser to indicate when it's okay to stop parsing the regex -}
52 | regexP :: PS.Parser Bool -> PS.Parser RegexC
53 | regexP rEOF = let
54 |
55 | regexP' :: PS.Parser [RegexC]
56 | regexP' = do
57 | traceM $ "regexP0"
58 | r <- extendedRegex regexElement
59 | traceM $ "regexP: " ++ show r
60 | whiteSpace
61 | b <- try rEOF
62 | traceM $ "regexP2: " ++ show b
63 | y <- getInput
64 | traceM $ "regexP3: " ++ show y
65 | if b
66 | then return [r]
67 | else do
68 | rs <- regexP'
69 | return $ r:rs
70 | > "regexP"
71 |
72 | in do
73 | xs <- regexP'
74 | return $ list2regex xs
75 |
76 | extendedRegex foo = do
77 | whiteSpace
78 | negation <- optionMaybe (symbol "~")
79 | whiteSpace
80 | r <- foo
81 | whiteSpace
82 | p <- optionMaybe (try $ satisfy (`elem` "+*?"))
83 | whiteSpace
84 | let fncn = (case negation of
85 | (Just _) -> Negation
86 | _ -> id)
87 | return $ fncn (case p of
88 | Nothing -> r
89 | Just '+' -> PosClos r
90 | Just '*' -> Kleene r
91 | Just '?' -> Question r
92 | Just _ -> undefined)
93 |
94 |
95 | regexElement :: PS.Parser RegexC
96 | regexElement = do
97 | whiteSpace
98 | r <- extendedRegex (charSet <||> literal <||> concatR <||> namedR <||> parens unionR)
99 | y <- getInput
100 | traceM $ show y
101 | return r
102 |
103 | {-
104 | return (case rs of
105 | [] -> r
106 | _ -> Concat (r:rs))
107 | -}
108 |
109 | many2 p = do { x <- p; xs <- many p; return (x:xs) }
110 |
111 | -- Named regex (upper-case identifier)
112 | namedR :: PS.Parser RegexC
113 | namedR = do
114 | s <- identifier
115 | return $ Named s
116 |
117 | unionR :: PS.Parser RegexC
118 | unionR = do
119 | traceM ""
120 | u <- sepBy1 regexElement (traceM "OR" >> whiteSpace >> symbol "|" >> traceM "OR2")
121 | traceM ""
122 | return $ Union u
123 |
124 | concatR :: PS.Parser RegexC
125 | concatR = do
126 | traceM ""
127 | c <- many1 (extendedRegex (charSet <||> literal <||> parens regexElement)) >>= return . Concat
128 | traceM ""
129 | return c
130 |
131 | parseEscape :: String -> String
132 | parseEscape s = (read $ "\"" ++ s ++ "\"") :: String
133 |
134 | -- regex string literal uses single quotes
135 | literal :: PS.Parser RegexC
136 | literal = do
137 | r <- PC.between (char '\'') (char '\'') (many singleChar >>= (return . Literal . parseEscape))
138 | y <- getInput
139 | traceM $ "literal: " ++ show y
140 | return r
141 |
142 | charSet :: PS.Parser RegexC
143 | charSet = do
144 | traceM ""
145 | whiteSpace
146 | cset <- PC.between (char '[') (char ']') (charSetBody >>= (return . CharSet . parseEscape))
147 | whiteSpace
148 | traceM $ ": " ++ show cset
149 | return cset
150 |
151 | charSetBody :: PS.Parser [Char]
152 | charSetBody = do
153 | traceM $ ""
154 | xs <- many $ charSetRange <||> (singleChar >>= (\c -> return [c]))
155 | traceM $ "charSetBody: " ++ (show $ concat xs)
156 | return $ concat xs
157 |
158 | charSetRange :: PS.Parser [Char]
159 | charSetRange = do
160 | start <- singleChar
161 | char '-'
162 | end <- singleChar
163 | if ord end <= ord start
164 | then unexpected [end]
165 | else return [start..end]
166 |
167 | singleChar =
168 | escapedChar
169 | <||>
170 | satisfy (\c -> not (c `elem` ['\'', ']']))
171 |
172 | escapedChar :: PS.Parser Char
173 | escapedChar = (do
174 | char '\\'
175 | char '\''
176 | return '\'')
177 | <||> (do
178 | char '\\'
179 | char ']'
180 | return ']')
181 | <||> (do
182 | char '\\'
183 | char '['
184 | return '[')
185 | <||> (do
186 | char '\\'
187 | char '\\'
188 | return '\\')
189 |
190 | regexLexer :: PT.TokenParser ()
191 | regexLexer = PT.makeTokenParser $ haskellStyle
192 | { reservedOpNames = ["[", "]", "//", "-", "+"] }
193 |
194 | whiteSpace = PT.whiteSpace regexLexer
195 | identifier = PT.identifier regexLexer
196 | operator = PT.operator regexLexer
197 | reserved = PT.reserved regexLexer
198 | reservedOp = PT.reservedOp regexLexer
199 | charLiteral = PT.charLiteral regexLexer
200 | stringLiteral = PT.stringLiteral regexLexer
201 | integer = PT.integer regexLexer
202 | natural = PT.natural regexLexer
203 | commaSep1 = PT.commaSep1 regexLexer
204 | parens = PT.parens regexLexer
205 | braces = PT.braces regexLexer
206 | brackets = PT.brackets regexLexer
207 | symbol = PT.symbol regexLexer
208 |
209 |
--------------------------------------------------------------------------------
/src/Language/ANTLR4/Syntax.hs:
--------------------------------------------------------------------------------
1 | {-|
2 | Module : Language.ANTLR4.Syntax
3 | Description : Helper syntax functions used by core G4 parser
4 | Copyright : (c) Karl Cronburg, 2018
5 | License : BSD3
6 | Maintainer : karl@cs.tufts.edu
7 | Stability : experimental
8 | Portability : POSIX
9 | -}
10 | module Language.ANTLR4.Syntax where
11 | import Language.ANTLR4.Boot.Syntax
12 | import Data.Char (readLitChar)
13 |
14 | import qualified Debug.Trace as D
15 |
16 | -- | Debugging support
17 | trace s = D.trace ("Language.ANTLR4.Syntax] " ++ s)
18 |
19 | -- | Parse an escape characters allowable in G4:
20 | readEscape :: String -> Char
21 | readEscape s = let
22 | eC ('\\':'n':xs) = '\n'
23 | eC ('\\':'r':xs) = '\r'
24 | eC ('\\':'t':xs) = '\t'
25 | eC ('\\':'b':xs) = '\b'
26 | eC ('\\':'f':xs) = '\f'
27 | eC ('\\':'v':xs) = '\v'
28 | eC ('\\':'"':xs) = '\"'
29 | eC ('\\':'\'':xs) = '\''
30 | eC ('\\':'\\':xs) = '\\'
31 | in eC s
32 |
33 | -- | Parse a literal String by stripping the quotes at the beginning and end of
34 | -- the String, and replacing all escaped characters with the actual escape
35 | -- character code.
36 | stripQuotesReadEscape :: String -> String
37 | stripQuotesReadEscape s = let
38 |
39 | eC [] = error "String ended in a single escape '\\': '" ++ s ++ "'"
40 | eC ('n':xs) = "\n" ++ sQRE xs
41 | eC ('r':xs) = "\r" ++ sQRE xs
42 | eC ('t':xs) = "\t" ++ sQRE xs
43 | eC ('b':xs) = "\b" ++ sQRE xs
44 | eC ('f':xs) = "\f" ++ sQRE xs
45 | eC ('v':xs) = "\v" ++ sQRE xs
46 | eC ('"':xs) = "\"" ++ sQRE xs
47 | eC ('\'':xs) = "\'" ++ sQRE xs
48 | eC ('\\':xs) = "\\" ++ sQRE xs
49 | eC ('u':a:b:c:d:xs) =
50 | case (readLitChar $ '\\' : 'x' : a : b : c : d : []) of
51 | ((hex, "") : []) -> hex : sQRE xs
52 | _ -> error $ "Invalid unicode character '\\u" ++ [a,b,c,d] ++ "' in string '" ++ s ++ "'"
53 | eC (x:xs) = error $ "Invalid escape character '" ++ [x] ++ "' in string '" ++ s ++ "'"
54 |
55 | sQRE [] = []
56 | sQRE ('\\':xs) = eC xs
57 | sQRE (x:xs) = x : sQRE xs
58 |
59 | --in trace s $ (sQRE . init . tail) s
60 | in (sQRE . init . tail) s
61 | --read $ "\"" ++ (init . tail) s ++ "\"" :: String
62 |
63 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Allstar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, FlexibleContexts #-}
2 | {-|
3 | Module : Text.ANTLR.Allstar
4 | Description : Entrypoint for using the ALL(*) parsing algorithm
5 | Copyright : (c) Karl Cronburg, 2018
6 | License : BSD3
7 | Maintainer : karl@cs.tufts.edu
8 | Stability : experimental
9 | Portability : POSIX
10 |
11 | This module contains the glue code for hooking Sam's
12 | 'Text.ANTLR.Allstar.ParserGenerator' implementation into the rest of
13 | this package.
14 | -}
15 | module Text.ANTLR.Allstar
16 | ( parse, parse', atnOf
17 | , ALL.GrammarSymbol(..)
18 | , ALL.ATNEnv
19 | ) where
20 |
21 | import qualified Text.ANTLR.Allstar.ParserGenerator as ALL
22 |
23 | import qualified Text.ANTLR.Parser as P
24 | import qualified Text.ANTLR.Grammar as G
25 | import qualified Text.ANTLR.Allstar.ATN as ATN
26 |
27 | import qualified Data.Set as DS
28 | import qualified Text.ANTLR.Set as S
29 |
30 | import Text.ANTLR.Pretty (Prettify(..))
31 |
32 | -- | Go from an Allstar AST to the AST type used internally in this package
33 | fromAllstarAST :: ALL.AST nts t -> P.AST nts t
34 | fromAllstarAST (ALL.Node nt ruleFired asts) = P.AST nt (map fromAllstarSymbol ruleFired) (map fromAllstarAST asts)
35 | fromAllstarAST (ALL.Leaf tok) = P.Leaf tok
36 |
37 | -- TODO: Handle predicate and mutator state during the conversion
38 | -- | Go from an antlr-haskell Grammar to an Allstar ATNEnv. ALL(*) does not
39 | -- currently support predicates and mutators.
40 | atnOf :: (Ord nt, Ord t, S.Hashable nt, S.Hashable t) => G.Grammar s nt t dt -> ALL.ATNEnv nt t
41 | atnOf g = DS.fromList (map convTrans (S.toList (ATN._Δ (ATN.atnOf g))))
42 |
43 | -- | ATN Transition to AllStar version
44 | convTrans (st0, e, st1) = (convState st0, convEdge e, convState st1)
45 |
46 | -- | ATN State to AllStar version
47 | convState (ATN.Start nt) = ALL.Init nt
48 | convState (ATN.Middle nt i0 i1) = ALL.Middle nt i0 i1
49 | convState (ATN.Accept nt) = ALL.Final nt
50 |
51 | -- | ATN Edge to AllStar version
52 | convEdge (ATN.NTE nt) = ALL.GS (ALL.NT nt)
53 | convEdge (ATN.TE t) = ALL.GS (ALL.T t)
54 | convEdge (ATN.PE p) = ALL.PRED True -- TODO
55 | convEdge (ATN.ME m) = ALL.PRED True -- TODO
56 | convEdge ATN.Epsilon = ALL.GS ALL.EPS
57 |
58 | -- | Entrypoint to the ALL(*) parsing algorithm.
59 | parse'
60 | :: ( P.CanParse nts tok, Prettify chr )
61 | => ALL.Tokenizer chr tok
62 | -> [chr]
63 | -> ALL.GrammarSymbol nts (ALL.Label tok)
64 | -> ALL.ATNEnv nts (ALL.Label tok)
65 | -> Bool
66 | -> Either String (P.AST nts tok)
67 | parse' tokenizer inp s0 atns cache = fromAllstarAST <$> ALL.parse tokenizer inp s0 atns cache
68 |
69 | -- | No tokenizer required (chr == tok):
70 | parse
71 | :: ( P.CanParse nts tok )
72 | => [tok]
73 | -> ALL.GrammarSymbol nts (ALL.Label tok)
74 | -> ALL.ATNEnv nts (ALL.Label tok)
75 | -> Bool
76 | -> Either String (P.AST nts tok)
77 | parse = let
78 | tokenizer [] = []
79 | tokenizer (t:ts) = [(t,ts)]
80 | in parse' tokenizer
81 |
82 | convSymbol s = ALL.NT s
83 |
84 | toAllstarSymbol :: G.ProdElem nts ts -> ALL.GrammarSymbol nts ts
85 | toAllstarSymbol (G.NT nts) = ALL.NT nts
86 | toAllstarSymbol (G.T ts) = ALL.T ts
87 | toAllstarSymbol (G.Eps) = ALL.EPS
88 |
89 | fromAllstarSymbol :: ALL.GrammarSymbol nts ts -> G.ProdElem nts ts
90 | fromAllstarSymbol (ALL.NT nts) = (G.NT nts)
91 | fromAllstarSymbol (ALL.T ts) = (G.T ts)
92 | fromAllstarSymbol ALL.EPS = G.Eps
93 |
94 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Allstar/ATN.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE ScopedTypeVariables, DeriveAnyClass, DeriveGeneric
2 | , FlexibleContexts, UndecidableInstances, StandaloneDeriving
3 | , OverloadedStrings #-}
4 | {-|
5 | Module : Text.ANTLR.Allstar.ATN
6 | Description : Augmented recursive transition network algorithms
7 | Copyright : (c) Karl Cronburg, 2018
8 | License : BSD3
9 | Maintainer : karl@cs.tufts.edu
10 | Stability : experimental
11 | Portability : POSIX
12 |
13 | -}
14 | module Text.ANTLR.Allstar.ATN where
15 | -- Augmented recursive Transition Network
16 | import Text.ANTLR.Grammar
17 | --import Text.ANTLR.Allstar.GSS hiding (Edge, Node)
18 | import Text.ANTLR.Allstar.Stacks
19 | import Text.ANTLR.Set (Set(..), empty, fromList, toList, Hashable, Generic)
20 | import Text.ANTLR.Pretty
21 |
22 | -- | Graph-structured stack over ATN states.
23 | type Gamma nt = Stacks (ATNState nt)
24 |
25 | -- | An ATN defining some language we wish to parse
26 | data ATN s nt t = ATN
27 | { _Δ :: Set (Transition s nt t) -- ^ The transition function
28 | } deriving (Eq, Ord, Show)
29 |
30 | instance (Prettify s, Prettify nt, Prettify t, Hashable nt, Hashable t, Eq nt, Eq t) => Prettify (ATN s nt t) where
31 | prettify atn = do
32 | pLine "_Δ:"
33 | incrIndent 4
34 | prettify $ _Δ atn
35 | incrIndent (-4)
36 |
37 | -- | Tuple corresponding to a distinct transition in the ATN:
38 | type Transition s nt t = (ATNState nt, Edge s nt t, ATNState nt)
39 |
40 | -- | The possible subscripts from Figure 8 of the ALL(*) paper
41 | data ATNState nt = Start nt
42 | | Middle nt Int Int
43 | | Accept nt
44 | deriving (Eq, Generic, Hashable, Ord, Show)
45 |
46 | -- | LaTeX style ATN states. TODO: check length of NT printed and put curly braces
47 | -- around it if more than one character.
48 | instance (Prettify nt) => Prettify (ATNState nt) where
49 | prettify (Start nt) = pStr "p_" >> prettify nt
50 | prettify (Accept nt) = pStr "p'_" >> prettify nt
51 | prettify (Middle nt i j) = do
52 | pStr "p_{"
53 | prettify i
54 | pStr ","
55 | prettify j
56 | pStr "}"
57 |
58 | -- | An edge in an ATN.
59 | data Edge s nt t =
60 | NTE nt -- ^ Nonterminal edge
61 | | TE t -- ^ Terminal edge
62 | | PE (Predicate ()) -- ^ Predicated edge with no state
63 | | ME (Mutator ()) -- ^ Mutator edge with no state
64 | | Epsilon -- ^ Nondeterministic edge parsing nothing
65 | deriving (Eq, Generic, Hashable, Ord, Show)
66 |
67 | instance (Prettify s, Prettify nt, Prettify t) => Prettify (Edge s nt t) where
68 | prettify x = do
69 | pStr "--"
70 | case x of
71 | NTE nt -> prettify nt
72 | TE t -> prettify t
73 | PE p -> prettify p
74 | ME m -> prettify m
75 | Epsilon -> pStr "ε"
76 | pStr "-->"
77 |
78 | -- | Convert a G4 grammar into an ATN for parsing with ALL(*)
79 | atnOf
80 | :: forall nt t s dt. (Eq nt, Eq t, Hashable nt, Hashable t)
81 | => Grammar s nt t dt -> ATN s nt t
82 | atnOf g = let
83 |
84 | _Δ :: Int -> Production s nt t dt -> [Transition s nt t]
85 | _Δ i (Production lhs rhs _) = let
86 | --(Prod _α)) = let
87 |
88 | -- Construct an internal production state from the given ATN identifier
89 | st :: nt -> Int -> Int -> ATNState nt
90 | st = Middle
91 |
92 | -- Create the transition for the k^th production element in the i^th
93 | -- production:
94 | _Δ' :: Int -> ProdElem nt t -> Transition s nt t
95 | _Δ' k (NT nt) = (st lhs i (k - 1), NTE nt, st lhs i k)
96 | _Δ' k (T t) = (st lhs i (k - 1), TE t, st lhs i k)
97 |
98 | -- The epsilon (or mu) transition for the accepting / final state:
99 | sϵ = (Start lhs, Epsilon, Middle lhs i 0)
100 | fϵ _α = (Middle lhs i (length _α), Epsilon, Accept lhs)
101 |
102 | sem_state _α = Middle lhs i (length _α + 1)
103 | sϵ_sem _π _α = [(Start lhs, Epsilon, sem_state _α), (sem_state _α, PE _π, Middle lhs i 0)]
104 | fϵ_sem = fϵ
105 |
106 | sϵ_mut = sϵ
107 | fϵ_mut _μ = (Middle lhs i 0, ME _μ, Accept lhs)
108 |
109 | in (case rhs of
110 | (Prod Pass _α) -> [sϵ, fϵ _α] ++ zipWith _Δ' [1..(length _α)] _α
111 | (Prod (Sem _π) _α) -> sϵ_sem _π _α ++ [fϵ_sem _α] ++ zipWith _Δ' [1..(length _α)] _α
112 | (Prod (Action _μ) _) -> [sϵ_mut, fϵ_mut _μ]
113 | )
114 |
115 | in ATN
116 | { _Δ = fromList $ concat $ zipWith _Δ [0..length (ps g)] $ ps g
117 | }
118 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Allstar/Stacks.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE ScopedTypeVariables, DeriveAnyClass, DeriveGeneric,
2 | OverloadedStrings #-}
3 | {-|
4 | Module : Text.ANTLR.Allstar.Stacks
5 | Description : Graph-structured stack (GSS) for the ALL(*) algorithm
6 | Copyright : (c) Karl Cronburg, 2018
7 | License : BSD3
8 | Maintainer : karl@cs.tufts.edu
9 | Stability : experimental
10 | Portability : POSIX
11 |
12 | -}
13 | module Text.ANTLR.Allstar.Stacks
14 | ( Stacks(..)
15 | , (#)
16 | , merge
17 | , push
18 | , pop
19 | ) where
20 | import qualified Prelude as P
21 | import Prelude hiding (map, foldr, filter)
22 | import Text.ANTLR.Set
23 | ( union, Set(..), foldr, map, filter
24 | , fromList, singleton, Hashable(..), Generic(..)
25 | )
26 | import qualified Text.ANTLR.Set as Set
27 | import Data.List (nub)
28 | import Text.ANTLR.Pretty
29 |
30 | -- | Graph-structured stack representation
31 | data Stacks a =
32 | Empty
33 | | Wildcard
34 | | Stacks (Set [a])
35 | deriving (Eq, Ord, Generic, Hashable, Show)
36 |
37 | instance (Prettify a, Hashable a, Eq a) => Prettify (Stacks a) where
38 | prettify Empty = pStr "[]"
39 | prettify Wildcard = pStr "#"
40 | prettify (Stacks s) = prettify s
41 |
42 | -- | Represents the set of __all__ stacks
43 | (#) = Wildcard
44 |
45 | -- | Combine two GSSs
46 | merge :: (Eq a, Hashable a) => Stacks a -> Stacks a -> Stacks a
47 | merge Wildcard _ = Wildcard
48 | merge _ Wildcard = Wildcard
49 | merge Empty Empty = Empty
50 | merge (Stacks _Γ) Empty = Stacks $ _Γ `union` fromList [[]]
51 | merge Empty (Stacks _Γ) = Stacks $ _Γ `union` fromList [[]]
52 | merge (Stacks _Γ) (Stacks _Γ') = Stacks $ _Γ `union` _Γ'
53 |
54 | -- | Push a state onto all the leaves of the given GSS
55 | push :: (Eq a, Hashable a) => a -> Stacks a -> Stacks a
56 | push a Empty = Stacks $ singleton [a]
57 | push a Wildcard = Wildcard
58 | push a (Stacks _Γ) = Stacks $ map ((:) a) _Γ
59 |
60 | -- | Get heads of non-empty stacks / lists:
61 | heads :: (Eq a, Hashable a) => Set [a] -> [a]
62 | heads = let
63 | heads' :: [a] -> [a] -> [a]
64 | heads' [] bs = bs
65 | heads' (a:as) bs = a:bs
66 | in foldr heads' []
67 |
68 | -- | Pop off all the current states from the given GSS
69 | pop :: (Eq a, Hashable a) => Stacks a -> [(a, Stacks a)]
70 | pop Empty = []
71 | pop Wildcard = []
72 | pop (Stacks _Γ) = let
73 | -- ss :: a -> Stacks a
74 | ss a = Stacks $ map tail $ filter (\as -> (not . null) as && ((== a) . head) as) _Γ
75 | in P.map (\a -> (a, ss a)) (nub . heads $ _Γ)
76 |
77 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Common.hs:
--------------------------------------------------------------------------------
1 | {-|
2 | Module : Text.ANTLR.Common
3 | Description : Haskell-level helper functions used throughout Text.ANTLR
4 | Copyright : (c) Karl Cronburg, 2018
5 | License : BSD3
6 | Maintainer : karl@cs.tufts.edu
7 | Stability : experimental
8 | Portability : POSIX
9 |
10 | -}
11 | module Text.ANTLR.Common where
12 |
13 | concatWith cs [] = []
14 | concatWith cs [x] = x
15 | concatWith cs (x:xs) = x ++ cs ++ concatWith cs xs
16 |
17 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Language.hs:
--------------------------------------------------------------------------------
1 | {-|
2 | Module : Text.ANTLR.Language
3 | Description : Viewing a language as a set of words accepted
4 | Copyright : (c) Karl Cronburg, 2018
5 | License : BSD3
6 | Maintainer : karl@cs.tufts.edu
7 | Stability : experimental
8 | Portability : POSIX
9 |
10 | -}
11 | module Text.ANTLR.Language
12 | ( Alphabet(..), ascii, isASCII
13 | ) where
14 | import Prelude hiding (Word)
15 | import Data.Set.Monad (Set(..))
16 | import qualified Data.Set.Monad as Set
17 |
18 | import Data.Char
19 |
20 | type Alphabet a = Set a
21 |
22 | ascii :: Alphabet Char
23 | ascii = Set.fromList $ map chr [0 .. 127]
24 |
25 | isASCII :: Char -> Bool
26 | isASCII c = ord c < 127
27 |
28 | type Word a = [a]
29 |
30 | type Language a = Set (Word a)
31 |
32 | union :: (Ord a) => Set a -> Set a -> Set a
33 | union = Set.union
34 |
35 | concat :: (Ord a) => Language a -> Language a -> Language a
36 | concat a b = Set.fromList
37 | [ s ++ t
38 | | s <- Set.toList a
39 | , t <- Set.toList b
40 | ]
41 |
42 | kleene = undefined
43 |
44 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE FlexibleInstances, InstanceSigs, DeriveDataTypeable
2 | , ScopedTypeVariables #-}
3 | {-|
4 | Module : Text.ANTLR.Lex
5 | Description : Entrypoint for lexical and tokenization algorithms
6 | Copyright : (c) Karl Cronburg, 2018
7 | License : BSD3
8 | Maintainer : karl@cs.tufts.edu
9 | Stability : experimental
10 | Portability : POSIX
11 |
12 | -}
13 | module Text.ANTLR.Lex
14 | ( tokenize
15 | , Token(..)
16 | , tokenName, tokenValue
17 | ) where
18 |
19 | import Text.ANTLR.Lex.Tokenizer
20 |
21 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/Automata.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE ScopedTypeVariables, MonadComprehensions #-}
2 | {-|
3 | Module : Text.ANTLR.Automata
4 | Description : Automatons and algorithms as used during tokenization
5 | Copyright : (c) Karl Cronburg, 2018
6 | License : BSD3
7 | Maintainer : karl@cs.tufts.edu
8 | Stability : experimental
9 | Portability : POSIX
10 |
11 | -}
12 | module Text.ANTLR.Lex.Automata where
13 | import Text.ANTLR.Set (Set(..), member, toList, union, notMember, Hashable(..), fromList)
14 | import qualified Text.ANTLR.Set as Set
15 |
16 | -- | An automaton with edges @e@, symbols @s@, and state indices @i@
17 | data Automata e s i = Automata
18 | { _S :: Set i -- ^ Finite set of states.
19 | , _Σ :: Set s -- ^ Input (edge) alphabet
20 | , _Δ :: Set (Transition e i) -- ^ Transition function
21 | , s0 :: i -- ^ Start state
22 | , _F :: Set i -- ^ Accepting states
23 | } deriving (Eq)
24 |
25 | instance (Eq e, Eq s, Eq i, Hashable e, Hashable s, Hashable i, Show e, Show s, Show i) => Show (Automata e s i) where
26 | show (Automata s sigma delta s0 f) =
27 | show s
28 | ++ "\n Σ: " ++ show sigma
29 | ++ "\n Δ: " ++ show delta
30 | ++ "\n s0: " ++ show s0
31 | ++ "\n F: " ++ show f
32 | ++ "\n"
33 |
34 | -- | Edge label of an automaton, on which we traverse if we match
35 | -- on one of the tokens @t@ in the set. The boolean is for negation
36 | -- of the set.
37 | type AutomataEdge t = (Bool, Set t)
38 |
39 | -- | A triplet with an edge alphabet of @e@ and node states of @i@.
40 | type Transition e i = (i, AutomataEdge e, i)
41 |
42 | -- | The from-node component of a 'Transition'
43 | tFrom :: Transition e i -> i
44 | tFrom (a,b,c) = a
45 |
46 | -- | The to-node component of a 'Transition'
47 | tTo :: Transition e i -> i
48 | tTo (a,b,c) = c
49 |
50 | -- | The set of edge characters in @e@ of a 'Transition'
51 | tEdge :: Transition e i -> Set e
52 | tEdge (a,(comp, b),c) = b
53 |
54 | -- | Determine the edge-label alphabet of a set of transitions.
55 | transitionAlphabet __Δ =
56 | [ e
57 | | (_, (c, es), _) <- toList __Δ
58 | , e <- es
59 | ]
60 |
61 | -- | Compress a set of transitions such that every pair of (start,end) states
62 | -- appears at most once in the set.
63 | compress ::
64 | (Eq i, Eq e, Hashable i, Hashable e)
65 | => Set (Transition e i) -> Set (Transition e i)
66 | compress __Δ = fromList
67 | [ ( a, (c, fromList [ e
68 | | (a', (c', es'), b') <- toList __Δ
69 | , a' == a && b' == b && c' == c
70 | , e <- toList es'
71 | ])
72 | , b)
73 | | (a, (c, es), b) <- toList __Δ
74 | ]
75 |
76 | -- | XOR helper function over booleans.
77 | xor a b = (not a && b) || (not b && a)
78 |
79 | -- | Is the given transition triplet (with a single @e@ character as the edge
80 | -- edge label) in some set of transitions? Note that we need to handle complement
81 | -- sets here, in case the given @e@ is in the complement of one of the
82 | -- transitions in the set.
83 | transitionMember ::
84 | (Eq i, Hashable e, Eq e)
85 | => (i, e, i) -> Set (Transition e i) -> Bool
86 | transitionMember (a, e, b) _Δ =
87 | or
88 | [ xor complement (e `member` es)
89 | | (a', (complement, es), b') <- toList _Δ
90 | , a' == a
91 | , b' == b
92 | ]
93 |
94 | -- | Is the given character @s@ accepted by the given edge label?
95 | edgeMember s (complement, es) = xor complement (s `member` es)
96 |
97 | -- | An automaton must either 'Accept' or 'Reject'.
98 | data Result = Accept | Reject
99 |
100 | -- | Is the start state valid?
101 | validStartState nfa = s0 nfa `member` _S nfa
102 |
103 | -- | Are all of the ending states valid?
104 | validFinalStates nfa = and [s `member` _S nfa | s <- toList $ _F nfa]
105 |
106 | -- | Can all of the nodes as defined by the set of transitions be found
107 | -- in the set of allowable states '_S'?
108 | validTransitions ::
109 | forall e s i. (Hashable e, Hashable i, Eq e, Eq i)
110 | => Automata e s i -> Bool
111 | validTransitions nfa = let
112 | vT :: [Transition e i] -> Bool
113 | vT [] = True
114 | vT ((s1, es, s2):rest) =
115 | s1 `member` _S nfa
116 | && s2 `member` _S nfa
117 | && vT rest
118 | in vT $ (toList . _Δ) nfa
119 |
120 | -- | An automaton configuration is the set of state (indices) your
121 | -- can currently be in.
122 | type Config i = Set i
123 |
124 | -- | Generic closure function so that *someone* never asks "what's a closure?" ever
125 | -- again. For an epsilon-closure the given @fncn@ needs to return 'True' when
126 | -- given an @e@ that is an epsilon, and 'False' in all other cases.
127 | closureWith
128 | :: forall e s i. (Hashable e, Hashable i, Eq e, Eq i)
129 | => (e -> Bool) -> Automata e s i -> Config i -> Config i
130 | closureWith fncn Automata{_S = _S, _Δ = _Δ'} states = let
131 |
132 | -- Check which edges are "epsilons" (or something else).
133 | _Δ = Set.map (\(a,(comp, b),c) -> (a, (comp, Set.map fncn b), c)) _Δ'
134 |
135 | cl :: Config i -> Config i -> Config i
136 | cl busy ss
137 | | Set.null ss = Set.empty
138 | | otherwise = let
139 | ret = fromList
140 | [ s' | s <- toList ss
141 | , s' <- toList _S
142 | , s' `notMember` busy
143 | , (s, True, s') `transitionMember` _Δ ]
144 | in ret `union` cl (ret `union` busy) ret
145 | in states `union` cl Set.empty states
146 | --in Set.foldr (\a b -> union (cl a) b) Set.empty states
147 |
148 | -- | Consume the @e@ character given, based on the fact that we are currently
149 | -- in some 'Config i' of states, resulting in a new config consisting of the
150 | -- states that we can get to by doing so.
151 | move
152 | :: forall e s i. (Hashable e, Hashable i, Eq i, Eq e)
153 | => Automata e s i -> Config i -> e -> Config i
154 | move Automata{_S = _S, _Δ = _Δ} _T a = fromList
155 | [ s' | s <- toList _T
156 | , s' <- toList _S
157 | , (s, a, s') `transitionMember` _Δ ]
158 |
159 | -- | Whether or not (a, (True, _), b) is a transition in our set of transitions.
160 | complementMember
161 | :: (Hashable i, Eq i, Hashable e, Eq e)
162 | => (i, i) -> Set (Transition e i) -> Bool
163 | complementMember (a, b) =
164 | not . null . Set.filter (\(a', (c, _), b') -> a' == a && b' == b && c)
165 |
166 | -- | Set of states you can move to if you see a character not in the alphabet.
167 | moveComplement
168 | :: forall e s i. (Hashable e, Hashable i, Eq i, Eq e)
169 | => Automata e s i -> Config i -> Config i
170 | moveComplement Automata{_S = _S, _Δ = _Δ} _T = fromList
171 | [ s' | s <- toList _T
172 | , s' <- toList _S
173 | , (s, s') `complementMember` _Δ ]
174 |
175 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/DFA.hs:
--------------------------------------------------------------------------------
1 | {-|
2 | Module : Text.ANTLR.Lex.DFA
3 | Description : Deterministic finite automaton types
4 | Copyright : (c) Karl Cronburg, 2018
5 | License : BSD3
6 | Maintainer : karl@cs.tufts.edu
7 | Stability : experimental
8 | Portability : POSIX
9 |
10 | -}
11 | module Text.ANTLR.Lex.DFA where
12 | import Text.ANTLR.Lex.Automata
13 |
14 | -- | DFA edges are just the symbols of our alphabet.
15 | type Edge s = s
16 |
17 | -- | DFA states are just some Eq-able value, likely integers @i@
18 | type State i = i
19 |
20 | -- | A DFA is an automata with edges labeled by symbols @s@ and nodes representing
21 | -- states labeled by some type @i@.
22 | type DFA s i = Automata (Edge s) s (State i)
23 |
24 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/NFA.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE ScopedTypeVariables, MonadComprehensions, DeriveAnyClass,
2 | DeriveGeneric #-}
3 | {-|
4 | Module : Text.ANTLR.Lex.NFA
5 | Description : Nondeterministic finite automatons and algorithms to compute DFAs
6 | Copyright : (c) Karl Cronburg, 2018
7 | License : BSD3
8 | Maintainer : karl@cs.tufts.edu
9 | Stability : experimental
10 | Portability : POSIX
11 |
12 | -}
13 | module Text.ANTLR.Lex.NFA where
14 | import Text.ANTLR.Lex.Automata
15 | import Text.ANTLR.Lex.DFA (DFA(..))
16 | import qualified Text.ANTLR.Lex.DFA as DFA
17 |
18 | import Text.ANTLR.Set (singleton, notMember, union, Set(..), member, Hashable)
19 | import qualified Text.ANTLR.Set as Set
20 | import Text.ANTLR.Set (fromList, toList)
21 |
22 | import Data.List (maximumBy)
23 | import GHC.Generics (Generic)
24 |
25 | -- | NFA edges can be labeled with either a symbol in symbol alphabet @s@,
26 | -- or an epsilon.
27 | data Edge s = Edge s | NFAEpsilon
28 | deriving (Ord, Eq, Hashable, Generic)
29 |
30 | instance (Show s) => Show (Edge s) where
31 | show NFAEpsilon = "ϵ"
32 | show (Edge s) = "E(" ++ show s ++ ")"
33 |
34 | -- | Is this an edge (not an epsilon)?
35 | isEdge :: Edge s -> Bool
36 | isEdge (Edge _) = True
37 | isEdge _ = False
38 |
39 | -- | An NFA is an automata with edges @'Edge' s@ and nodes @i@.
40 | type NFA s i = Automata (Edge s) s i
41 |
42 | -- | NFA states
43 | type State i = i
44 |
45 | -- | DFA states as constructed from an NFA is a set (config) of NFA states.
46 | type DFAState i = Config (State i)
47 |
48 | -- | Epsilon closure of an NFA is a closure where we can traverse epsilons.
49 | epsClosure ::
50 | (Ord i, Hashable i, Hashable s, Eq s)
51 | => Automata (Edge s) s i -> Config i -> Config i
52 | epsClosure = closureWith (NFAEpsilon ==)
53 |
54 | -- | Subset construction algorithm for constructing a DFA from an NFA.
55 | nfa2dfa_slow :: forall s i. (Hashable s, Eq s, Hashable i, Eq i, Ord i)
56 | => NFA s i -> DFA s (Set (State i))
57 | nfa2dfa_slow nfa@Automata{s0 = s0, _Σ = _Σ, _F = _F0} = let
58 |
59 | epsCl = epsClosure nfa
60 | mv = move nfa
61 |
62 | dS :: Config (DFAState i) -> Config (DFAState i) -> Set (Transition (DFA.Edge s) (DFAState i))
63 | dS marked ts
64 | | Set.null ts = Set.empty
65 | | otherwise = let
66 |
67 | _Δ = fromList
68 | [ (_T, (False, singleton a), epsCl (mv _T (Edge a)))
69 | | _T <- toList ts
70 | , _T `notMember` marked
71 | , a <- toList _Σ
72 | ]
73 |
74 | _Us = Set.map (\(a,b,c) -> c) _Δ
75 | fromStates = Set.map (\(a,b,c) -> a) _Δ
76 |
77 | in _Δ `union` dS (fromStates `union` marked) _Us
78 |
79 | _Δ' :: Set (Transition (DFA.Edge s) (DFAState i))
80 | _Δ' = dS Set.empty (singleton s0')
81 |
82 | s0' = epsCl $ singleton s0
83 |
84 | in Automata
85 | { _S = fromList [ tFrom x | x <- toList _Δ' ] `union` fromList [ tTo x | x <- toList _Δ' ]
86 | , _Σ = _Σ
87 | , _Δ = _Δ'
88 | , s0 = s0'
89 | , _F = fromList [nfaState | (_,_,nfaState) <- toList _Δ', c <- toList nfaState, c `member` _F0]
90 | }
91 |
92 | -- | Subset construction but where we compress our sets of transitions along the way.
93 | nfa2dfa :: forall s i. (Hashable s, Eq s, Hashable i, Eq i, Ord i)
94 | => NFA s i -> DFA s (Set (State i))
95 | nfa2dfa nfa@Automata{s0 = s0, _Σ = _Σ, _S = _S, _F = _F0} = let
96 |
97 | epsCl = epsClosure nfa
98 | mv = move nfa
99 |
100 | dS :: Config (DFAState i) -> Config (DFAState i) -> Set (Transition (DFA.Edge s) (DFAState i))
101 | dS marked ts
102 | | Set.null ts = Set.empty
103 | | otherwise = let
104 |
105 | _Δ =
106 | Set.fromList
107 | [ (_T, (False, singleton a), epsCl (mv _T (Edge a)))
108 | | _T <- Set.toList ts
109 | , _T `notMember` marked
110 | , a <- Set.toList _Σ
111 | ]
112 | `union`
113 | Set.fromList
114 | [ (_T, (True, _Σ), epsCl $ moveComplement nfa _T)
115 | | _T <- Set.toList ts
116 | , _T `notMember` marked
117 | ]
118 |
119 | _Us = fromList [ c | (a,b,c) <- toList _Δ ]
120 | fromStates = fromList [ a | (a,b,c) <- toList _Δ ]
121 |
122 | in _Δ `union` dS (fromStates `union` marked) _Us
123 |
124 | _Δ' :: Set (Transition (DFA.Edge s) (DFAState i))
125 | _Δ' = let run_dS = dS Set.empty (singleton s0')
126 | in Set.filter (\(_, _, b) -> not $ Set.null b) $ compress run_dS
127 |
128 | s0' = epsCl $ singleton s0
129 |
130 | in Automata
131 | { _S = fromList [ tFrom x | x <- toList _Δ' ] `union` fromList [ tTo x | x <- toList _Δ' ]
132 | , _Σ = _Σ
133 | , _Δ = _Δ'
134 | , s0 = s0'
135 | , _F = fromList [nfaState | (_,_,nfaState) <- toList _Δ', c <- toList nfaState, c `member` _F0]
136 | }
137 |
138 | -- | Compute all the states statically used in a particular set of transitions.
139 | allStates :: forall s i. (Hashable i, Eq i) => Set (Transition (Edge s) i) -> Set (State i)
140 | allStates ts = fromList [ n | (n, _, _) <- toList ts ] `union` fromList [ n | (_, _, n) <- toList ts ]
141 |
142 | -- | Converts the given list of transitions into a complete NFA / Automata
143 | -- structure, assuming two things:
144 | --
145 | -- > The first node of the first edge is the start state
146 | -- > The last node of the last edge is the (only) final state
147 | --
148 | list2nfa :: forall s i. (Hashable i, Eq i, Hashable s, Eq s) => [Transition (Edge s) i] -> NFA s i
149 | list2nfa [] = undefined
150 | list2nfa ((t@(n1,_,_)):ts) = Automata
151 | { _S = allStates $ Set.fromList (t:ts)
152 | , _Σ = Set.fromList [ e
153 | | (_, es, _) <- t:ts
154 | , Edge e <- filter isEdge (Set.toList $ snd es)
155 | ]
156 | , s0 = n1
157 | , _F = Set.fromList [ (\(_,_,c) -> c) $ last (t:ts) ]
158 | , _Δ = Set.fromList $ t:ts
159 | }
160 |
161 | -- | Rename the states in the second NFA such that they start at the index
162 | -- one greater than the maximum index of the first NFA.
163 | shiftAllStates ::
164 | forall s i. (Hashable i, Eq i, Ord i, Hashable s, Eq s)
165 | => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i -> NFA s i
166 | shiftAllStates from to
167 | n1 (n2@Automata{_Δ = _Δ2, _S = _S2, _F = _F2, s0 = s2_0})
168 | = n2 { _Δ = fromList [ (to $ from i0 + shift, e, to $ from i1 + shift) | (i0, e, i1) <- toList _Δ2 ]
169 | , _S = fromList [ to $ from i + shift | i <- toList _S2 ]
170 | , _F = fromList [ to $ from i + shift | i <- toList _F2 ]
171 | , s0 = to $ from s2_0 + shift
172 | }
173 | where
174 | shift = 1 + foldr (\(i0, _, i1) i -> from $ maximum [to i, i0, i1]) 0 (_Δ n1)
175 |
176 | -- | Take the union of two NFAs, renaming states according to 'shiftAllStates'.
177 | nfaUnion ::
178 | forall s i. (Ord i, Hashable i, Eq i, Hashable s, Eq s)
179 | => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i -> NFA s i
180 | nfaUnion from to
181 | (n1@Automata{_Δ = _Δ1, _S = _S1, _F = _F1, s0 = s1_0}) n2
182 | = let
183 |
184 | Automata{_Δ = _Δ2, _S = _S2, _F = _F2, s0 = s2_0} = shiftAllStates from to n1 n2
185 | mx2 = 1 + foldr (\(i0, _, i1) i -> from $ maximum [to i, i0, i1]) 0 _Δ2
186 |
187 | _Δ' = _Δ1
188 | `union` _Δ2
189 | `union` Set.singleton (s0', (False, singleton NFAEpsilon), s1_0)
190 | `union` Set.singleton (s0', (False, singleton NFAEpsilon), s2_0)
191 | `union` fromList [ (f1_0, (False, singleton NFAEpsilon), f0') | f1_0 <- toList _F1 ]
192 | `union` fromList [ (f2_0, (False, singleton NFAEpsilon), f0') | f2_0 <- toList _F2 ]
193 |
194 | s0' = to mx2
195 | f0' = to $ mx2 + 1
196 |
197 | in Automata
198 | { _S = allStates _Δ'
199 | , _Σ = fromList [ e
200 | | (_, es, _) <- toList _Δ'
201 | , Edge e <- toList $ Set.filter isEdge $ snd es
202 | ]
203 | , s0 = s0'
204 | , _F = Set.fromList [f0']
205 | , _Δ = _Δ'
206 | }
207 |
208 | -- | Concatenate two NFAs, renaming states in the second NFA according to 'shiftAllStates'.
209 | nfaConcat ::
210 | forall s i. (Hashable i, Eq i, Ord i, Hashable s, Eq s) => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i -> NFA s i
211 | nfaConcat from to
212 | (n1@Automata{_Δ = _Δ1, _S = _S1, _F = _F1, s0 = s1_0}) n2
213 | = let
214 | Automata{_Δ = _Δ2, _S = _S2, _F = _F2, s0 = s2_0} = shiftAllStates from to n1 n2
215 |
216 | _Δ' = _Δ1
217 | `union` _Δ2
218 | `union` fromList [ (f1_0, (False, singleton NFAEpsilon), s2_0) | f1_0 <- toList _F1 ]
219 |
220 | in Automata
221 | { _S = allStates _Δ'
222 | , _Σ = fromList [ e
223 | | (_, es, _) <- toList _Δ'
224 | , Edge e <- toList $ Set.filter isEdge $ snd es
225 | ]
226 | , s0 = s1_0
227 | , _F = _F2
228 | , _Δ = _Δ'
229 | }
230 |
231 | -- | Take the Kleene-star of an NFA, adding epsilons as needed.
232 | nfaKleene :: forall s i. (Ord i, Hashable i, Eq i, Hashable s, Eq s) => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i
233 | nfaKleene from to
234 | (n1@Automata{_Δ = _Δ1, _S = _S1, _F = _F1, s0 = s1_0})
235 | = let
236 | mx1 = 1 + foldr (\(i0, _, i1) i -> from $ maximum [to i, i0, i1]) 0 _Δ1
237 |
238 | s0' = to mx1
239 | f0' = to $ mx1 + 1
240 |
241 | _Δ' = _Δ1
242 | `union` Set.singleton (s0', (False, singleton NFAEpsilon), s1_0)
243 | `union` Set.singleton (s0', (False, singleton NFAEpsilon), f0')
244 | `union` fromList [ (f1_0, (False, singleton NFAEpsilon), s1_0) | f1_0 <- toList _F1 ]
245 | `union` fromList [ (f1_0, (False, singleton NFAEpsilon), f0') | f1_0 <- toList _F1 ]
246 |
247 | in Automata
248 | { _S = allStates _Δ'
249 | , _Σ = fromList [ e
250 | | (_, es, _) <- toList _Δ'
251 | , Edge e <- toList $ Set.filter isEdge $ snd es
252 | ]
253 | , s0 = s0'
254 | , _F = Set.fromList [f0']
255 | , _Δ = _Δ'
256 | }
257 |
258 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/Regex.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE ScopedTypeVariables, DeriveLift #-}
2 | {-|
3 | Module : Text.ANTLR.Lex.Regex
4 | Description : Regular expressions as used during tokenization
5 | Copyright : (c) Karl Cronburg, 2018
6 | License : BSD3
7 | Maintainer : karl@cs.tufts.edu
8 | Stability : experimental
9 | Portability : POSIX
10 |
11 | -}
12 | module Text.ANTLR.Lex.Regex where
13 |
14 | import Text.ANTLR.Set (Hashable, singleton, fromList)
15 | import Text.ANTLR.Lex.NFA
16 | import qualified Text.ANTLR.Lex.DFA as DFA
17 | import Language.Haskell.TH.Syntax (Lift(..))
18 |
19 | -- | Regular expression data representation as used by the tokenizer.
20 | data Regex s =
21 | Epsilon -- ^ Regex accepting the empty string
22 | | Symbol s -- ^ An individual symbol in the alphabet
23 | | Literal [s] -- ^ A literal sequence of symbols (concatenated together)
24 | | Class [s] -- ^ A set of alternative symbols (unioned together)
25 | | Union (Regex s) (Regex s) -- ^ Union of two arbitrary regular expressions
26 | | Concat [Regex s] -- ^ Concatenation of 2 or more regular expressions
27 | | Kleene (Regex s) -- ^ Kleene closure of a regex
28 | | PosClos (Regex s) -- ^ Positive closure
29 | | Question (Regex s) -- ^ 0 or 1 instances
30 | | MultiUnion [Regex s] -- ^ Union of two or more arbitrary regexs
31 | | NotClass [s] -- ^ Complement of a character class
32 | deriving (Lift)
33 |
34 | instance (Show s) => Show (Regex s) where
35 | show Epsilon = "ϵ"
36 | show (Symbol s) = show s
37 | show (Literal s) = show s
38 | show (Class s) = "[" ++ show s ++ "]"
39 | show (Union r1 r2) = "(" ++ show r1 ++ "|" ++ show r2 ++ ")"
40 | show (Concat rs) = concatMap show rs
41 | show (Kleene r) = "(" ++ show r ++ ")*"
42 | show (PosClos r) = "(" ++ show r ++ ")+"
43 | show (Question r) = "(" ++ show r ++ ")?"
44 | show (MultiUnion rs) = tail $ concatMap (\r -> "|" ++ show r) rs
45 | show (NotClass rs) = "[^" ++ tail (concatMap show rs) ++ "]"
46 |
47 | -- | Translation code of a regular expresion to an NFA.
48 | regex2nfa' ::
49 | forall s i. (Hashable i, Ord i, Hashable s, Eq s)
50 | => (i -> Int) -> (Int -> i) -> Regex s -> NFA s i
51 | regex2nfa' from to r = let
52 | r2n :: Regex s -> NFA s i
53 | r2n Epsilon = list2nfa [ (to 0, (False, singleton NFAEpsilon), to 1) ]
54 | r2n (Symbol s) = list2nfa [ (to 0, (False, singleton $ Edge s), to 1) ]
55 | r2n (Union r1 r2) = nfaUnion from to (r2n r1) (r2n r2)
56 | r2n (Concat []) = r2n Epsilon -- TODO: empty concat
57 | r2n (Concat (r:rs)) = foldl (nfaConcat from to) (r2n r) (map r2n rs)
58 | r2n (Kleene r1) = nfaKleene from to (r2n r1)
59 | r2n (PosClos r1) = r2n $ Concat [r1, Kleene r1]
60 | r2n (Question r1) = nfaUnion from to (r2n r1) (r2n Epsilon)
61 | r2n (Class []) = r2n Epsilon -- TODO: empty character class shouldn't accept empty string?
62 | r2n (Class (s:ss)) = list2nfa [ (to 0, (False, fromList $ map Edge $ s:ss), to 1) ] --r2n $ foldl Union (Symbol s) (map Symbol ss)
63 | r2n (MultiUnion []) = r2n Epsilon
64 | r2n (MultiUnion (r:rs)) = r2n $ foldl Union r rs
65 | r2n (Literal ss) = list2nfa $ map (\(s,i) -> (to i, (False, singleton $ Edge s), to $ i + 1)) (zip ss [0..length ss - 1])
66 | r2n (NotClass []) = list2nfa $ [ (to 0, (True, fromList []), to 1) ] -- Not nothing = everything
67 | r2n (NotClass (s:ss)) = list2nfa $ [ (to 0, (True, fromList $ map Edge $ s:ss), to 1) ]
68 | in r2n r
69 |
70 | -- | Entrypoint for translating a regular expression into an 'NFA' with integer indices.
71 | regex2nfa :: (Hashable s, Ord s) => Regex s -> NFA s Int
72 | regex2nfa = regex2nfa' id id
73 |
74 | -- | Entrypoint for translating a regular expression into a 'DFA.DFA' with integer indices.
75 | regex2dfa :: (Hashable s, Ord s) => Regex s -> DFA.DFA s (DFAState Int)
76 | regex2dfa = nfa2dfa . regex2nfa
77 |
78 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Lex/Tokenizer.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE ScopedTypeVariables, DeriveGeneric, DeriveAnyClass
2 | , OverloadedStrings #-}
3 | {-|
4 | Module : Text.ANTLR.Lex.Tokenizer
5 | Description : Tokenization algorithms
6 | Copyright : (c) Karl Cronburg, 2018
7 | License : BSD3
8 | Maintainer : karl@cs.tufts.edu
9 | Stability : experimental
10 | Portability : POSIX
11 |
12 | -}
13 | module Text.ANTLR.Lex.Tokenizer where
14 | import Text.ANTLR.Lex.Automata
15 | import Text.ANTLR.Lex.DFA
16 |
17 | import qualified Text.ANTLR.Set as Set
18 | import Text.ANTLR.Set (Hashable, member, Generic(..), Set(..))
19 |
20 | import Text.ANTLR.Pretty
21 | import qualified Debug.Trace as D
22 | import Data.List (find)
23 | import qualified Data.Text as T
24 |
25 | -- | Token with names @n@, values @v@, and number of input symbols consumed to match
26 | -- it.
27 | data Token n v =
28 | Token n v Int -- ^ Tokenized a token
29 | | EOF -- ^ The end-of-file token
30 | | Error T.Text -- ^ Error encountered while tokenizing
31 | deriving (Show, Ord, Generic, Hashable)
32 |
33 | instance (Prettify n, Prettify v) => Prettify (Token n v) where
34 | prettify EOF = pStr "EOF"
35 | prettify (Error s) = pStr "Token Error: " >> pStr s
36 | prettify (Token n v i) =
37 | prettify v
38 |
39 | instance Eq n => Eq (Token n v) where
40 | Token s _ _ == Token s1 _ _ = s == s1
41 | EOF == EOF = True
42 | Error s == Error s1 = s == s1
43 | _ == _ = False
44 |
45 | -- | Token Names are Input Symbols to the parser.
46 | tokenName :: Token n v -> n
47 | tokenName (Token n v _) = n
48 |
49 | -- | Get the value of a token, ignoring its name.
50 | tokenValue :: Token n v -> v
51 | tokenValue (Token n v _) = v
52 |
53 | -- | Get the number of characters from the input that this token matched on.
54 | tokenSize :: Token n v -> Int
55 | tokenSize (Token _ _ i) = i
56 | tokenSize EOF = 0
57 |
58 | -- | A Lexeme is a sequence of zero or more (matched) input symbols
59 | type Lexeme s = [s]
60 |
61 | -- | A named DFA over symbols @s@, indices @i@, and names @n@.
62 | type NDFA s i n = (n, DFA s i)
63 |
64 | -- | Entrypoint for tokenizing an input stream given a list of named DFAs that
65 | -- we can match on.
66 | --
67 | -- > @dfaTuples@: converts from DFAs to the names associated with them in
68 | -- the specification of the lexer.
69 | --
70 | -- > @fncn@: function for constructing the value of a token from the lexeme
71 | -- matched (e.g. @varName@) and the associated token name (e.g. @id@)
72 | --
73 | tokenize ::
74 | forall s i n v. (Eq i, Ord s, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s)
75 | => [(n, DFA s i)] -- ^ Association list of named DFAs.
76 | -> (Lexeme s -> n -> v) -- ^ Constructs the value of a token from lexeme matched.
77 | -> [s] -- ^ The input string.
78 | -> [Token n v] -- ^ The tokenized tokens.
79 | tokenize dfaTuples fncn input0 = let
80 |
81 | dfas0 = map snd dfaTuples
82 |
83 | allTok :: [(NDFA s i n, State i)] -> [s] -> [Token n v]
84 | allTok dfaSims0 currInput = let
85 | oneTok :: [(NDFA s i n, State i)] -> [s] -> Maybe (Lexeme s, NDFA s i n)
86 | oneTok dfaSims [] = Nothing
87 | oneTok [] ss = Nothing
88 | oneTok dfaSims (s:ss) = let
89 | dfaSims' =
90 | [ ((n, dfa), stop)
91 | | ((n, dfa), cursor) <- dfaSims
92 | , (start, es, stop) <- Set.toList $ _Δ dfa
93 | , start == cursor && s `edgeMember` es ]
94 |
95 | accepting = [ (n,dfa) | ((n, dfa), cursor) <- dfaSims', cursor `member` _F dfa ]
96 |
97 | in (case (oneTok dfaSims' ss, accepting) of
98 | (Nothing, []) -> Nothing
99 | (Nothing, d:ds) -> Just ([s], d)
100 | (Just (l,d), _) -> Just (s:l, d))
101 | in case (currInput, oneTok dfaSims0 currInput) of
102 | ([], _) -> [EOF]
103 | (ss, Nothing) -> [Error $ T.pack $ show ss]
104 | (ss, Just (l, (name,d))) ->
105 | Token name (fncn l name) (length l)
106 | : allTok dfaSims0 (drop (length l) currInput)
107 | in allTok (zip dfaTuples (map s0 dfas0)) input0
108 |
109 | -- | Incremental tokenizer takes in the same list of DFAs and AST value
110 | -- constructor function, but instead returns an incremental tokenizer function
111 | -- that expects a set of names that we currently expect to tokenize on,
112 | -- the current input stream, and returns a single tokenized token along
113 | -- with the modified input stream to iteratively call 'tokenizeInc' on.
114 | tokenizeInc
115 | :: forall s i n v. (Eq i, Ord s, Eq n, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s, Hashable n)
116 | => (n -> Bool) -- ^ Function that returns True on DFA names we wish to filter __out__ of the results.
117 | -> [(n, DFA s i)] -- ^ Closure over association list of named DFAs.
118 | -> (Lexeme s -> n -> v) -- ^ Token value constructor from lexemes.
119 | -> (Set n -> [s] -> (Token n v, [s])) -- ^ The incremental tokenizer closure.
120 | tokenizeInc filterF dfaTuples fncn = let
121 |
122 | tI :: Set n -> [s] -> (Token n v, [s])
123 | tI ns input = let
124 |
125 | dfaTuples' = filter (\(n,_) -> n `Set.member` ns || filterF n) dfaTuples
126 | tokenized = tokenize dfaTuples' fncn input
127 |
128 | filterF' (Token n _ _) = filterF n
129 | filterF' _ = False
130 |
131 | ignored = takeWhile filterF' tokenized
132 | nextTokens = dropWhile filterF' tokenized
133 | -- Yayy lazy function evaluation.
134 | next = case nextTokens of
135 | [] -> EOF
136 | (t:_) -> t --D.traceShowId t
137 |
138 | in (next, drop (sum $ map tokenSize $ next : ignored) input)
139 | in tI
140 |
141 | tokenizeIncAll
142 | :: forall s i n v. (Eq i, Ord s, Eq n, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s, Hashable n)
143 | => (n -> Bool) -- ^ Function that returns True on DFA names we wish to filter __out__ of the results.
144 | -> [(n, DFA s i)]
145 | -> (Lexeme s -> n -> v)
146 | -> (Set n -> [s] -> [(Token n v, [s])])
147 | tokenizeIncAll filterF dfaTuples fncn = let
148 |
149 | tI :: Set n -> [s] -> [(Token n v, [s])]
150 | tI ns input = let
151 |
152 | dfaTuples' = filter (\(n,_) -> n `Set.member` ns || filterF n) dfaTuples
153 | tokenized = tokenize dfaTuples' fncn input
154 |
155 | filterF' (Token n _ _) = filterF n
156 | filterF' _ = False
157 |
158 | ignored = takeWhile filterF' tokenized
159 | nextTokens = dropWhile filterF' tokenized
160 | -- Yayy lazy function evaluation.
161 | next = case nextTokens of
162 | [] -> EOF
163 | (t:_) -> t --D.traceShowId t
164 |
165 | input' = drop (sum $ map tokenSize $ next : ignored) input
166 |
167 | ns' = case next of
168 | Token n _ _ -> n `Set.delete` ns
169 | _ -> Set.empty
170 |
171 | in (next, input') : tI ns' input'
172 | in tI
173 |
174 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/MultiMap.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass, MonadComprehensions, DeriveLift,
2 | DeriveDataTypeable #-}
3 | {-|
4 | Module : Text.ANTLR.MultiMap
5 | Description : A one-to-many key value map
6 | Copyright : (c) Karl Cronburg, 2018
7 | License : BSD3
8 | Maintainer : karl@cs.tufts.edu
9 | Stability : experimental
10 | Portability : POSIX
11 |
12 | -}
13 | module Text.ANTLR.MultiMap where
14 | import qualified Data.Map.Strict as M
15 | import Data.Maybe (fromMaybe)
16 | import Text.ANTLR.Set (Generic(..), Hashable(..), Set(..))
17 | import qualified Text.ANTLR.Set as S
18 | import Prelude hiding (lookup)
19 | import Text.ANTLR.Pretty
20 |
21 | import Data.Data (Data(..))
22 | import Language.Haskell.TH.Syntax (Lift(..))
23 |
24 | instance (Lift k, Lift v, Data k, Data v, Ord k, Ord v) => Lift (M.Map k v)
25 |
26 | -- | A multi 'Map' is a mapping from keys @k@ to sets of values @v@. A nice
27 | -- invariant to maintain while using a multi-map is to never have empty
28 | -- sets mapped to by some key.
29 | newtype Map k v = Map (M.Map k (Set v))
30 | deriving (Generic, Hashable, Eq, Show, Lift)
31 |
32 | instance (Prettify k, Prettify v, Hashable v, Eq v) => Prettify (Map k v) where
33 | prettify (Map m) = prettify m
34 |
35 | -- | The singleton multimap, given a single key and a __single__ value.
36 | singleton :: (Hashable v, Eq v) => k -> v -> Map k v
37 | singleton k v = Map (M.singleton k (S.singleton v))
38 |
39 | -- | Construct a multi 'Map' from a list of key-value pairs.
40 | fromList :: (Hashable v, Ord k, Eq k, Eq v) => [(k, v)] -> Map k v
41 | fromList kvs = Map (M.fromList
42 | [ (k1, S.fromList [v2 | (k2, v2) <- kvs, k1 == k2])
43 | | (k1, _) <- kvs])
44 |
45 | -- | Same as 'fromList' but where the values in the key-value tuples are already in sets.
46 | fromList' :: (Ord k, Eq k, Hashable v, Eq v) => [(k, Set v)] -> Map k v
47 | fromList' kvs = fromList [(k, v) | (k, vs) <- kvs, v <- S.toList vs]
48 |
49 | -- | Inverse of 'fromList\''.
50 | toList :: Map k v -> [(k, Set v)]
51 | toList (Map m) = M.toList m
52 |
53 | -- | Take the union of two maps.
54 | union :: (Ord k, Eq k, Hashable v, Eq v) => Map k v -> Map k v -> Map k v
55 | union m1 m2 = fromList' (toList m1 ++ toList m2)
56 |
57 | -- | The empty multi-map.
58 | empty :: Map k v
59 | empty = Map M.empty
60 |
61 | -- | Get the set of values mapped to by some key @k@.
62 | lookup :: (Ord k, Hashable v, Eq v) => k -> Map k v -> Set v
63 | lookup k (Map m) = fromMaybe S.empty (M.lookup k m)
64 |
65 | -- | Number of keys in the multi-map.
66 | size (Map m) = M.size m
67 |
68 | -- | Map difference of two multi-maps, deleting individual key-value pairs
69 | -- rather than deleting the entire key. Invariant maintained is that
70 | -- input maps with non-null value sets will result in an output with
71 | -- non-null value sets.
72 | difference (Map m1) m2 = Map $ M.fromList
73 | [ (k1, vs)
74 | | (k1, vs1) <- M.toList m1
75 | , let vs2 = k1 `lookup` m2
76 | , let vs = vs1 `S.difference` vs2
77 | , (not . S.null) vs
78 | ]
79 |
80 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Parser.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass, FlexibleContexts, InstanceSigs
2 | , UndecidableInstances, StandaloneDeriving, TypeFamilies
3 | , ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses
4 | , OverloadedStrings, DeriveDataTypeable, ConstraintKinds #-}
5 | {-|
6 | Module : Text.ANTLR.Parser
7 | Description : Parsing API for constructing Haskell data types from lists of tokens
8 | Copyright : (c) Karl Cronburg, 2018
9 | License : BSD3
10 | Maintainer : karl@cs.tufts.edu
11 | Stability : experimental
12 | Portability : POSIX
13 |
14 | -}
15 | module Text.ANTLR.Parser where
16 | import Text.ANTLR.Grammar hiding (Action)
17 | import Text.ANTLR.Pretty
18 | import Text.ANTLR.Set (Generic(..))
19 | import Text.ANTLR.Lex.Tokenizer (Token(..))
20 | import Data.Data (Data(..))
21 | import Language.Haskell.TH.Lift (Lift(..))
22 | import Text.ANTLR.Set (Hashable)
23 |
24 | -- | Nonterminals in a grammar are tabular, terminal symbols are tabular (as are
25 | -- the EOF-stripped version), terminals are referenceable (can be symbolized),
26 | -- and terminals are also tabular.
27 | type CanParse nts t =
28 | ( Tabular nts
29 | , Tabular (Sym t)
30 | , Tabular (StripEOF (Sym t))
31 | , HasEOF (Sym t)
32 | , Ref t
33 | , Tabular t)
34 |
35 | -- | Same as 'CanParse' but with second formal parameter representing (StripEOF (Sym t))
36 | -- aka "sts" (stripped terminal symbol).
37 | type CanParse' nts sts = ( Tabular nts, Tabular sts )
38 |
39 | type IsAST ast = ( Ord ast, Eq ast, Hashable ast )
40 |
41 | type IsState st = ( Ord st, Hashable st, Prettify st )
42 | type Tabular sym = ( Ord sym, Hashable sym, Prettify sym, Eq sym )
43 |
44 | -- | Action functions triggered during parsing are given the nonterminal we just matched on, the
45 | -- corresponding list of production elements (grammar symbols) in the RHS of the matched production
46 | -- alternative, and the result of recursively.
47 | --
48 | -- A 'ParseEvent' may also be just a terminal matched on, or an epsilon event
49 | -- based heavily on which parsing algorithm is being run.
50 | --
51 | -- __This__ data type is one of the data types that tie together terminal (token) types
52 | -- and terminal symbol types. When the parser produces a terminal event, you're
53 | -- seeing a __token__, but when the parser produces a nonterminal event, you're
54 | -- seeing a production in the grammar firing which contains terminal __symbols__,
55 | -- not tokens.
56 | data ParseEvent ast nts t =
57 | TermE t -- ^ A terminal was seen in the input
58 | | NonTE (nts, ProdElems nts (StripEOF (Sym t)), [ast]) -- ^ A non-terminal was seen in the input
59 | | EpsE -- ^ Epsilon event
60 |
61 | deriving instance (Show ast, Show nts, Show (StripEOF (Sym t)), Show t) => Show (ParseEvent ast nts t)
62 |
63 | instance (Prettify ast, Prettify nts, Prettify (StripEOF (Sym t)), Prettify t) => Prettify (ParseEvent ast nts t) where
64 | prettify e = do
65 | pStr "Terminal Event: "
66 | incrIndent 2
67 | prettify e
68 | incrIndent (-2)
69 |
70 | -- | An Action as seen by the host language (Haskell) is a function from parse
71 | -- events to an abstract-syntax tree that the function constructs based on which
72 | -- non-terminal or terminal symbol was seen.
73 | type Action ast nts t = ParseEvent ast nts t -> ast
74 |
75 | -- | An Icon (as used in first and follow sets of the LL1 parser and the
76 | -- shift-reduce table of the LR1 parser) is just a terminal symbol taken from
77 | -- the grammar, or it's an epsilon or EOF.
78 | data Icon ts =
79 | Icon ts -- ^ Terminal symbol icon
80 | | IconEps -- ^ Epsilon icon
81 | | IconEOF -- ^ EOF (end of file / input) icon
82 | deriving (Generic, Hashable, Show, Eq, Ord, Data, Lift)
83 |
84 | -- | __This__ is the function defining the (n == Sym t == ts) relationship between
85 | -- the __name__ type of a token, the __symbol__ type of a terminal token (as
86 | -- constructed by the tokenizer), and the __terminal symbol__ type as used by the
87 | -- parser. When a parser wants to compare the symbol of an input token to a
88 | -- terminal symbol found in the grammar, it should convert the token to an icon
89 | -- using this function and then compare icons using Eq because icons throw away
90 | -- the value of a token, leaving only the Eq-able piece that we care about.
91 | token2symbol :: Token n v -> TokenSymbol n
92 | token2symbol (Token n v _) = TokenSymbol n
93 | token2symbol EOF = EOFSymbol
94 | token2symbol (Error s) = EOFSymbol
95 |
96 | -- | Tokens are symbolized by an icon containing their name.
97 | instance Ref (Token n v) where
98 | type Sym (Token n v) = TokenSymbol n
99 | getSymbol = token2symbol
100 |
101 | -- | The symbol for some tokenize is either just it's name @n@ or the special EOF symbol.
102 | data TokenSymbol n =
103 | TokenSymbol n -- ^ Named symbol
104 | | EOFSymbol -- ^ End-of-file symbol
105 | deriving (Eq, Ord, Show, Hashable, Generic)
106 |
107 | instance (Prettify n) => Prettify (TokenSymbol n) where
108 | prettify (TokenSymbol n) = do
109 | pStr "TokenSymbol "
110 | prettify n
111 | prettify EOFSymbol = pStr "EOFSymbol"
112 |
113 | -- | A data type with an EOF constructor. There are two things you can do with a
114 | -- data type that has an EOF:
115 | --
116 | -- > Ask for the type *without* the EOF at compile time
117 | -- > Ask whether or not an instance is the EOF symbol at runtime
118 | --
119 | class HasEOF t where
120 | -- | The unwrapped type (without the EOF data constructor alternative)
121 | type StripEOF t :: *
122 | -- | Whether or not the given value of type t is the EOF value
123 | isEOF :: t -> Bool
124 | -- | Take a token and try to unwrap its name (an EOF should result in Nothing)
125 | stripEOF :: t -> Maybe (StripEOF t)
126 |
127 | instance HasEOF (TokenSymbol n) where
128 | type StripEOF (TokenSymbol n) = n
129 |
130 | isEOF EOFSymbol = True
131 | isEOF _ = False
132 |
133 | stripEOF EOFSymbol = Nothing
134 | stripEOF (TokenSymbol n) = Just n
135 |
136 | instance HasEOF String where
137 | type StripEOF String = String
138 |
139 | isEOF "" = True
140 | isEOF _ = False
141 |
142 | stripEOF "" = Nothing
143 | stripEOF x = Just x
144 |
145 | instance (Prettify ts) => Prettify (Icon ts) where
146 | prettify IconEps = pStr "iϵ"
147 | prettify IconEOF = pStr "iEOF"
148 | prettify (Icon ts) = do
149 | pStr "Icon "
150 | prettify ts
151 |
152 | -- | Is this a terminal-symbol icon?
153 | isIcon Icon{} = True
154 | isIcon _ = False
155 |
156 | -- | Is this an epsilon icon?
157 | isIconEps IconEps = True
158 | isIconEps _ = False
159 |
160 | -- | Is this the EOF icon?
161 | isIconEOF IconEOF = True
162 | isIconEOF _ = False
163 |
164 | -- | Universal Abstract Syntax Tree data type. All internal AST "nodes" have a
165 | -- nonterminal, the grammar production symbols it reduced from, and the
166 | -- resulting recursively defined AST nodes acquired from the parser. Leaf AST
167 | -- nodes can be either an epsilon (when explicit epsilons are used in the
168 | -- grammar) or more importantly a terminal symbol.
169 | -- __This__ is another type that defines the relationship between the terminal
170 | -- token type @t@ and the terminal symbol type @(ts == Sym t)@ where the AST tells
171 | -- you the production rule that fired containing @ts@ as well as the tokens @t@
172 | -- contained in leaves of the AST.
173 | data AST nts t =
174 | LeafEps -- ^ Epsilon leaf AST node
175 | | Leaf t -- ^ Terminal token leaf in the AST
176 | | AST nts (ProdElems nts (StripEOF (Sym t))) [AST nts t] -- ^ Internal AST node
177 | deriving (Generic)
178 |
179 | deriving instance (Eq (StripEOF (Sym t)), Eq nts, Eq t) => Eq (AST nts t)
180 | deriving instance (Ord (StripEOF (Sym t)), Ord nts, Ord t) => Ord (AST nts t)
181 | deriving instance (Show (StripEOF (Sym t)), Show nts, Show t) => Show (AST nts t)
182 | deriving instance (Hashable (StripEOF (Sym t)), Hashable nts, Hashable t) => Hashable (AST nts t)
183 |
184 | instance (Prettify nts, Prettify t) => Prettify (AST nts t) where
185 | prettify LeafEps = pStr "ϵ"
186 | prettify (Leaf t) = prettify t
187 | prettify (AST nts ps asts) = do
188 | prettify nts
189 | pStr "{"
190 | prettify asts
191 | pStr "}"
192 |
193 | -- | Default AST-constructor function which just copies over the contents of
194 | -- some parse event into an 'AST'.
195 | event2ast :: ParseEvent (AST nts t) nts t -> AST nts t
196 | event2ast (TermE t) = Leaf t
197 | event2ast (NonTE (nts, ss, asts)) = AST nts ss asts
198 |
199 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Pretty.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE FlexibleInstances, DefaultSignatures, UndecidableInstances
2 | , OverloadedStrings #-}
3 | {-|
4 | Module : Text.ANTLR.Pretty
5 | Description : A pretty-printing type class to be used across antlr-haskell modules
6 | Copyright : (c) Karl Cronburg, 2018
7 | License : BSD3
8 | Maintainer : karl@cs.tufts.edu
9 | Stability : experimental
10 | Portability : POSIX
11 |
12 | I want to have something like Show whereby every time I add a new type to the
13 | system, I can implement a function that gets called by existing code which
14 | happens to have types that get parametrized by that type. I don't want to
15 | modify an existing file / centralizing all of the types in my system into a
16 | single file makes little sense because then that one file becomes a hub /
17 | single point of failure.
18 |
19 | * I need a typeclass (no modifying existing files, but they need to call my
20 | new code without passing around a new show function)
21 |
22 | * The prettify function of that typeclass needs to return a state monad so
23 | that recursive calls keep the state
24 |
25 | * A pshow function needs to evalState on the prettify function with an
26 | initial indentation of zero (along with any other future state values...)
27 |
28 | -}
29 | module Text.ANTLR.Pretty where
30 | import Control.Monad.Trans.State.Lazy
31 | import qualified Data.Map.Strict as M
32 | import Data.Data (toConstr, Data(..))
33 |
34 | import qualified Data.Text as T
35 |
36 | -- | Pretty-printing state
37 | data PState = PState
38 | { indent :: Int -- ^ current indentation level
39 | , vis_chrs :: Int -- ^ number of visible characters consumed so far
40 | , str :: T.Text -- ^ the string, 'T.Text', that we've constructed so far
41 | , columns_soft :: Int -- ^ soft limit on number of columns to consume per row
42 | , columns_hard :: Int -- ^ hard limit on number of columns to consume per row
43 | , curr_col :: Int -- ^ column number we're on in the current row of 'str'
44 | , curr_row :: Int -- ^ number of rows (newlines) we've printed to 'str'
45 | }
46 |
47 | -- | The pretty state monad
48 | type PrettyM val = State PState val
49 |
50 | -- | No value being threaded through the monad (because result is in 'str')
51 | type Pretty = PrettyM ()
52 |
53 | -- | Define the 'Prettify' type class for your pretty-printable type @t@.
54 | class Prettify t where
55 | {-# MINIMAL prettify #-}
56 |
57 | -- | Defines how to pretty-print some type.
58 | prettify :: t -> Pretty
59 | default prettify :: (Show t) => t -> Pretty
60 | prettify = rshow
61 |
62 | -- | Lists are pretty-printed specially.
63 | prettifyList :: [t] -> Pretty
64 | prettifyList = prettifyList_
65 |
66 | -- | Initial Pretty state with safe soft and hard column defaults.
67 | initPState = PState
68 | { indent = 0 -- Indentation level
69 | , vis_chrs = 0 -- Number of visible characters consumed.
70 | , str = T.empty -- The string
71 | , columns_soft = 100 -- Soft limit on terminal width.
72 | , columns_hard = 120 -- Hard limit on terminal width.
73 | , curr_col = 0 -- Column position in the current row.
74 | , curr_row = 0 -- Number of newlines seen
75 | }
76 |
77 | -- | Prettify a string by putting it on the end of the current string state
78 | pLine :: T.Text -> Pretty
79 | pLine s = do
80 | pStr s
81 | _pNewLine
82 |
83 | -- | Pretty print a literal string by just printing the string.
84 | pStr' :: String -> Pretty
85 | pStr' = pStr . T.pack
86 |
87 | -- | This currently assumes all input strings contain no newlines, and that this is
88 | -- only called on relatively small strings because strings running over the end
89 | -- of the hard column limit get dumped onto the next line __no matter what__.
90 | -- T.Texts can run over the soft limit, but hitting the soft limit after a call
91 | -- to 'pStr' forces a newline.
92 | pStr :: T.Text -> Pretty
93 | pStr s = do
94 | pstate <- get
95 | _doIf _pNewLine (T.length s + curr_col pstate > columns_hard pstate && curr_col pstate /= 0)
96 | pstate <- get
97 | _doIf _pIndent (curr_col pstate == 0 && indent pstate > 0)
98 | pstate <- get
99 | put $ pstate
100 | { str = T.append (str pstate) s
101 | , curr_col = (curr_col pstate) + T.length s
102 | }
103 | pstate <- get
104 | _doIf _pNewLine (curr_col pstate > columns_soft pstate)
105 |
106 | -- | Print a single character to the output.
107 | pChr :: Char -> Pretty
108 | pChr c = pStr $ T.singleton c
109 |
110 | -- | Gets rid of if-then-else lines in the Pretty monad code:
111 | _doIf fncn True = fncn
112 | _doIf fncn False = return ()
113 |
114 | -- | Indent by the number of spaces specified in the state.
115 | _pIndent :: Pretty
116 | _pIndent = do
117 | pstate <- get
118 | put $ pstate
119 | { str = str pstate `T.append` T.replicate (indent pstate) (T.singleton ' ')
120 | , curr_col = curr_col pstate + indent pstate
121 | , vis_chrs = vis_chrs pstate + indent pstate
122 | }
123 |
124 | -- | Insert a newline
125 | _pNewLine :: Pretty
126 | _pNewLine = do
127 | pstate <- get
128 | put $ pstate
129 | { str = T.snoc (str pstate) '\n'
130 | , curr_col = 0
131 | , curr_row = curr_row pstate + 1
132 | }
133 |
134 | -- | Run the pretty-printer, returning a 'T.Text'.
135 | pshow :: (Prettify t) => t -> T.Text
136 | pshow t = str $ execState (prettify t) initPState
137 |
138 | -- | Run the pretty-printer, returning a 'String'.
139 | pshow' :: (Prettify t) => t -> String
140 | pshow' = T.unpack . pshow
141 |
142 | pshowList :: (Prettify t) => [t] -> T.Text
143 | pshowList t = str $ execState (prettifyList t) initPState
144 |
145 | pshowList' :: (Prettify t) => [t] -> String
146 | pshowList' = T.unpack . pshowList
147 |
148 | -- | Run the pretty-printer with a specific indentation level.
149 | pshowIndent :: (Prettify t) => Int -> t -> T.Text
150 | pshowIndent i t = str $ execState (prettify t) $ initPState { indent = i }
151 |
152 | -- | Plain-vanilla show of something in the 'Pretty' state monad.
153 | rshow :: (Show t) => t -> Pretty
154 | rshow t = do
155 | pstate <- get
156 | let s = show t
157 | put $ pstate
158 | { str = str pstate `T.append` T.pack s
159 | , curr_row = curr_row pstate + (T.length . T.filter (== '\n')) (T.pack s)
160 | , curr_col = curr_col pstate -- TODO
161 | }
162 |
163 | -- | Parenthesize something in 'Pretty'.
164 | pParens fncn = do
165 | pChr '('
166 | fncn
167 | pChr ')'
168 |
169 | -- | Increment the indentation level by modifying the pretty-printer state.
170 | incrIndent :: Int -> Pretty
171 | incrIndent n = do
172 | pstate <- get
173 | put $ pstate { indent = indent pstate + n }
174 |
175 | -- | Like 'incrIndent' but set indentation level instead of incrementing.
176 | setIndent :: Int -> Pretty
177 | setIndent n = do
178 | pstate <- get
179 | put $ pstate { indent = n }
180 |
181 | -- | Prettify the given value and compute the number of characters consumed as a
182 | -- result.
183 | pCount :: (Prettify v) => v -> PrettyM Int
184 | pCount v = do
185 | i0 <- indent <$> get
186 | prettify v
187 | i1 <- indent <$> get
188 | return (i1 - i0)
189 |
190 | -- | Pretty-print a list with one entry per line.
191 | pListLines :: (Prettify v) => [v] -> Pretty
192 | pListLines vs = do
193 | pStr $ T.pack "[ "
194 | col0 <- curr_col <$> get
195 | i0 <- indent <$> get
196 | setIndent (col0 - 2)
197 | sepBy (pLine T.empty >> (pStr $ T.pack ", ")) (map prettify vs)
198 | pLine T.empty >> pChr ']'
199 | setIndent i0 -- Reset indentation back to what it was
200 |
201 | instance (Prettify k, Prettify v) => Prettify (M.Map k v) where
202 | prettify m = do
203 | -- (5 == length of "Map: ") ==> TODO: indentation "discipline"
204 | pStr "Map: "; incrIndent 5
205 | prettify $ M.toList m -- TODO: prettier map
206 | incrIndent (-5)
207 |
208 | instance (Prettify v) => Prettify (Maybe v) where
209 | prettify Nothing = pStr "Nope"
210 | prettify (Just v) = pStr "Yep" >> pParens (prettify v)
211 |
212 | -- | Prettify a list with possibly more than one entry per line.
213 | prettifyList_ [] = pStr "[]"
214 | prettifyList_ vs = do
215 | pChr '['
216 | sepBy (pStr ", ") (map prettify vs)
217 | pChr ']'
218 |
219 | instance (Prettify v) => Prettify [v] where
220 | prettify = prettifyList
221 |
222 | -- TODO: template haskell-ify for larger tuples
223 | instance (Prettify a, Prettify b) => Prettify (a,b) where
224 | prettify (a,b) = do
225 | pChr '('
226 | prettify a
227 | pChr ','
228 | prettify b
229 | pChr ')'
230 |
231 | instance (Prettify a, Prettify b, Prettify c) => Prettify (a,b,c) where
232 | prettify (a,b,c) = do
233 | pChr '('
234 | prettify a
235 | pChr ','
236 | prettify b
237 | pChr ','
238 | prettify c
239 | pChr ')'
240 |
241 | instance (Prettify a, Prettify b, Prettify c, Prettify d) => Prettify (a,b,c,d) where
242 | prettify (a,b,c,d) = do
243 | pChr '('
244 | prettify a
245 | pChr ','
246 | prettify b
247 | pChr ','
248 | prettify c
249 | pChr ','
250 | prettify d
251 | pChr ')'
252 |
253 | -- | Pretty-print a list of values, separated by some other pretty-printer.
254 | sepBy s [] = return ()
255 | sepBy s (v:vs) = foldl (_sepBy s) v vs
256 |
257 | -- | Reorder pretty-printer bind.
258 | _sepBy s ma mb = ma >> s >> mb
259 |
260 | instance Prettify Char where
261 | prettify = pChr
262 | prettifyList = pStr . T.pack
263 |
264 | instance Prettify () where prettify = rshow
265 | instance Prettify Bool where prettify = rshow
266 | instance Prettify Int where prettify = rshow
267 |
268 | instance Prettify Double where prettify = rshow
269 |
270 |
--------------------------------------------------------------------------------
/src/Text/ANTLR/Set.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE GADTs, DeriveAnyClass, DeriveGeneric, OverloadedStrings, DeriveLift
2 | , QuasiQuotes, TemplateHaskell, DeriveDataTypeable #-}
3 | {-|
4 | Module : Text.ANTLR.Set
5 | Description : Entrypoint for swapping out different underlying set representations
6 | Copyright : (c) Karl Cronburg, 2018
7 | License : BSD3
8 | Maintainer : karl@cs.tufts.edu
9 | Stability : experimental
10 | Portability : POSIX
11 |
12 | -}
13 | module Text.ANTLR.Set
14 | ( Set, null, size, member, notMember
15 | , empty, singleton, insert, delete, union, unions
16 | , difference, intersection, filter, map, foldr, foldl', fold
17 | , toList, fromList, (\\), findMin, maybeMin
18 | , Hashable(..), Generic(..)
19 | ) where
20 | import Text.ANTLR.Pretty
21 |
22 | import GHC.Generics (Generic, Rep)
23 | import Data.Hashable (Hashable(..))
24 | import Language.Haskell.TH.Syntax (Lift(..))
25 |
26 | import qualified Data.Functor as F
27 | import qualified Control.Applicative as A
28 | import qualified Data.Foldable as Foldable
29 |
30 | import Data.Map ( Map(..) )
31 | import qualified Data.Map as M
32 |
33 | import qualified Data.HashSet as S
34 | import Data.HashSet as S
35 | ( HashSet(..), member, toList, union
36 | , null, empty, map, size, singleton, insert
37 | , delete, unions, difference, intersection, foldl'
38 | , fromList
39 | )
40 |
41 | import Prelude hiding (null, filter, map, foldr, foldl)
42 |
43 | -- | Use a hash-based set (hashable keys) for our internal set representation
44 | -- during parsing.
45 | type Set = S.HashSet
46 |
47 | -- | Is @e@ not a member of the set @s@.
48 | notMember e s = not $ member e s
49 |
50 | -- | Set fold
51 | fold = S.foldr
52 |
53 | -- | Set fold
54 | foldr = S.foldr
55 |
56 | -- | Find the minimum value of an orderable set.
57 | findMin :: (Ord a, Hashable a) => Set a -> a
58 | findMin = minimum . toList
59 |
60 | --maybeMin :: (Ord a, Hashable a) => Set a -> Maybe a
61 | -- | Get minimum of a set without erroring out on empty set.
62 | maybeMin as
63 | | S.size as == 0 = Nothing
64 | | otherwise = Just $ findMin as
65 |
66 | infixl 9 \\
67 |
68 | -- | Set difference
69 | (\\) :: (Hashable a, Eq a) => Set a -> Set a -> Set a
70 | m1 \\ m2 = difference m1 m2
71 |
72 | instance (Hashable a, Eq a, Lift a) => Lift (S.HashSet a) where
73 | lift set = [| fromList $(lift $ toList set) |]
74 |
75 | instance (Hashable k, Hashable v) => Hashable (Map k v) where
76 | hashWithSalt salt mp = salt `hashWithSalt` M.toList mp
77 |
78 | instance (Prettify a, Hashable a, Eq a) => Prettify (S.HashSet a) where
79 | prettify s = do
80 | pStr "Set: "; incrIndent 5
81 | pListLines $ toList s
82 | incrIndent (-5)
83 | pLine ""
84 |
85 | --filter :: (Hashable a, Eq a) => (a -> Bool) -> Set a -> Set a
86 | -- | Set filter
87 | filter f s = S.filter f s
88 |
89 | --instance (Hashable a, Eq a) => Hashable (S.HashSet a) where
90 | -- hashWithSalt salt set = salt `hashWithSalt` S.toList (run set)
91 |
92 |
93 | {-
94 |
95 | --import Data.Set.Monad (Set(..), member, toList, union, notMember)
96 | --import qualified Data.Set.Monad as Set
97 |
98 | import Prelude hiding (null, filter, map, foldr, foldl)
99 | import qualified Data.List as L
100 | --import qualified Data.Set as S
101 | import qualified Data.Functor as F
102 | import qualified Control.Applicative as A
103 | import qualified Data.Foldable as Foldable
104 |
105 | import Data.Monoid
106 | import Data.Foldable (Foldable)
107 | import Control.Arrow
108 | import Control.Monad
109 | import Control.DeepSeq
110 |
111 | import Data.Hashable (Hashable(..))
112 | import GHC.Generics (Generic, Rep)
113 | import Control.DeepSeq (NFData(..))
114 | import Language.Haskell.TH.Syntax (Lift(..))
115 | import Data.Data (Data(..))
116 |
117 | import Data.Map ( Map(..) )
118 | import qualified Data.Map as M
119 |
120 | import Text.ANTLR.Pretty
121 |
122 | instance (Hashable k, Hashable v) => Hashable (Map k v) where
123 | hashWithSalt salt mp = salt `hashWithSalt` M.toList mp
124 |
125 | instance (Hashable a, Eq a) => Hashable (Set a) where
126 | hashWithSalt salt set = salt `hashWithSalt` S.toList (run set)
127 |
128 | instance (Hashable a, Ord a) => Ord (Set a) where
129 | s1 <= s2 = S.toList (run s1) <= S.toList (run s2)
130 |
131 | data Set a where
132 | Prim :: (Hashable a, Eq a) => S.HashSet a -> Set a
133 | Return :: a -> Set a
134 | Bind :: Set a -> (a -> Set b) -> Set b
135 | Zero :: Set a
136 | Plus :: Set a -> Set a -> Set a
137 |
138 | instance (Data a) => Data (Set a)
139 |
140 | instance (Hashable a, Eq a, Lift a) => Lift (Set a) where
141 | lift set = [| fromList $(lift $ toList set) |]
142 |
143 | run :: (Hashable a, Eq a) => Set a -> S.HashSet a
144 | run (Prim s) = s
145 | run (Return a) = S.singleton a
146 | run (Zero) = S.empty
147 | run (Plus ma mb) = run ma `S.union` run mb
148 | run (Bind (Prim s) f) = S.foldl' S.union S.empty (S.map (run . f) s)
149 | run (Bind (Return a) f) = run (f a)
150 | run (Bind Zero _) = S.empty
151 | run (Bind (Plus (Prim s) ma) f) = run (Bind (Prim (s `S.union` run ma)) f)
152 | run (Bind (Plus ma (Prim s)) f) = run (Bind (Prim (run ma `S.union` s)) f)
153 | run (Bind (Plus (Return a) ma) f) = run (Plus (f a) (Bind ma f))
154 | run (Bind (Plus ma (Return a)) f) = run (Plus (Bind ma f) (f a))
155 | run (Bind (Plus Zero ma) f) = run (Bind ma f)
156 | run (Bind (Plus ma Zero) f) = run (Bind ma f)
157 | run (Bind (Plus (Plus ma mb) mc) f) = run (Bind (Plus ma (Plus mb mc)) f)
158 | run (Bind (Plus ma mb) f) = run (Plus (Bind ma f) (Bind mb f))
159 | run (Bind (Bind ma f) g) = run (Bind ma (\a -> Bind (f a) g))
160 |
161 | instance F.Functor Set where
162 | fmap = liftM
163 |
164 | instance A.Applicative Set where
165 | pure = return
166 | (<*>) = ap
167 |
168 | instance A.Alternative Set where
169 | empty = Zero
170 | (<|>) = Plus
171 |
172 | instance Monad Set where
173 | return = Return
174 | (>>=) = Bind
175 |
176 | instance MonadPlus Set where
177 | mzero = Zero
178 | mplus = Plus
179 |
180 | instance (Hashable a, Eq a) => Monoid (Set a) where
181 | mempty = empty
182 | mappend = union
183 | mconcat = unions
184 |
185 | instance Foldable Set where
186 | foldr f def m =
187 | case m of
188 | Prim s -> S.foldr f def s
189 | Return a -> f a def
190 | Zero -> def
191 | Plus ma mb -> Foldable.foldr f (Foldable.foldr f def ma) mb
192 | Bind s g -> Foldable.foldr f' def s
193 | where f' x b = Foldable.foldr f b (g x)
194 |
195 | instance (Hashable a, Eq a) => Eq (Set a) where
196 | s1 == s2 = run s1 == run s2
197 |
198 | --instance (Hashable a, Eq a, Ord a) => Ord (Set a) where
199 | -- compare s1 s2 = compare (run s1) (run s2)
200 |
201 | instance (Show a, Hashable a, Eq a) => Show (Set a) where
202 | show = show . run
203 |
204 | instance (Prettify a, Hashable a, Eq a) => Prettify (Set a) where
205 | prettify s = do
206 | pStr "Set: "; incrIndent 5
207 | pListLines $ toList s
208 | incrIndent (-5)
209 | pLine ""
210 |
211 | instance (Read a, Hashable a, Eq a) => Read (Set a) where
212 | readsPrec i s = L.map (first Prim) (readsPrec i s)
213 |
214 | instance (NFData a, Hashable a, Eq a) => NFData (Set a) where
215 | rnf = rnf . run
216 |
217 | infixl 9 \\
218 |
219 | (\\) :: (Hashable a, Eq a) => Set a -> Set a -> Set a
220 | m1 \\ m2 = difference m1 m2
221 |
222 | null :: (Hashable a, Eq a) => Set a -> Bool
223 | null = S.null . run
224 |
225 | size :: (Hashable a, Eq a) => Set a -> Int
226 | size = S.size . run
227 |
228 | member :: (Hashable a, Eq a) => a -> Set a -> Bool
229 | member a s = S.member a (run s)
230 |
231 | notMember :: (Hashable a, Eq a) => a -> Set a -> Bool
232 | notMember a t = not (member a t)
233 |
234 | empty :: (Hashable a, Eq a) => Set a
235 | empty = Prim S.empty
236 |
237 | singleton :: (Hashable a, Eq a) => a -> Set a
238 | singleton a = Prim (S.singleton a)
239 |
240 | insert :: (Hashable a, Eq a) => a -> Set a -> Set a
241 | insert a s = Prim (S.insert a (run s))
242 |
243 | delete :: (Hashable a, Eq a) => a -> Set a -> Set a
244 | delete a s = Prim (S.delete a (run s))
245 |
246 | union :: (Hashable a, Eq a) => Set a -> Set a -> Set a
247 | union s1 s2 = Prim (run s1 `S.union` run s2)
248 |
249 | unions :: (Hashable a, Eq a) => [Set a] -> Set a
250 | unions ss = Prim (S.unions (L.map run ss))
251 |
252 | difference :: (Hashable a, Eq a) => Set a -> Set a -> Set a
253 | difference s1 s2 = Prim (S.difference (run s1) (run s2))
254 |
255 | intersection :: (Hashable a, Eq a) => Set a -> Set a -> Set a
256 | intersection s1 s2 = Prim (S.intersection (run s1) (run s2))
257 |
258 | filter :: (Hashable a, Eq a) => (a -> Bool) -> Set a -> Set a
259 | filter f s = Prim (S.filter f (run s))
260 |
261 | map :: (Hashable a, Eq a, Hashable b, Eq b) => (a -> b) -> Set a -> Set b
262 | map f s = Prim (S.map f (run s))
263 |
264 | foldr :: (Hashable a, Eq a) => (a -> b -> b) -> b -> Set a -> b
265 | foldr f z s = S.foldr f z (run s)
266 |
267 | fold :: (Hashable a, Eq a) => (a -> b -> b) -> b -> Set a -> b
268 | fold f z s = S.foldr f z (run s)
269 |
270 | foldl' :: (Hashable a, Eq a) => (b -> a -> b) -> b -> Set a -> b
271 | foldl' f z s = S.foldl' f z (run s)
272 |
273 | toList :: (Hashable a, Eq a) => Set a -> [a]
274 | toList = S.toList . run
275 |
276 | fromList :: (Hashable a, Eq a) => [a] -> Set a
277 | fromList as = Prim (S.fromList as)
278 |
279 | findMin :: (Ord a, Hashable a) => Set a -> a
280 | findMin = minimum . toList
281 |
282 | maybeMin :: (Ord a, Hashable a) => Set a -> Maybe a
283 | maybeMin as
284 | | size as == 0 = Nothing
285 | | otherwise = Just $ findMin as
286 |
287 | -}
288 |
289 |
--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | # resolver: nightly-2019-04-10
2 | resolver: lts-16.15
3 |
4 | packages:
5 | - .
6 |
7 |
--------------------------------------------------------------------------------
/test/GrammarReader/GrammarReader.hs:
--------------------------------------------------------------------------------
1 | import Text.ParserCombinators.Parsec as Parsec
2 | import Data.Char
3 | import ParserGenerator.AllStar
4 |
5 | --Functions for parsing a context-free grammar in BNF form
6 |
7 | grammarFile = endBy production eol
8 |
9 | production =
10 | do l <- nonterminal
11 | skipMany spacesAndTabs
12 | string "::="
13 | skipMany spacesAndTabs
14 | r <- rightSide
15 | return (l, r)
16 |
17 | rightSide = sepBy expansion expansionSep
18 |
19 | expansion = many symbol
20 |
21 | expansionSep =
22 | do skipMany spacesAndTabs
23 | char '|'
24 | skipMany spacesAndTabs
25 |
26 | symbol = nonterminal <|> terminal
27 |
28 | nonterminal = upper
29 |
30 | terminal = lower
31 |
32 | spacesAndTabs = oneOf " \t"
33 |
34 | eol = char '\n'
35 |
36 |
37 | parseGrammar :: String -> Either ParseError [(Char, [String])]
38 | parseGrammar input = Parsec.parse grammarFile "(unknown)" input
39 |
40 |
41 | --Functions for converting a parsed grammar to an ATN environment
42 |
43 | grammarToATNEnv g =
44 | case parseGrammar g of
45 | Left e -> error "could not parse grammar"
46 | Right productions ->
47 | let (nonterminals, _) = unzip productions
48 | in zip nonterminals (map productionToATN productions)
49 |
50 |
51 | productionToATN (nt, expansions) =
52 | map (\(n, exp) -> expansionToATNPath nt exp n) (zip [1..] expansions)
53 |
54 | expansionToATNPath nt exp choiceNum =
55 | let buildMiddleAndFinalStates n symbols =
56 | case symbols of
57 | [] -> [(MIDDLE n, EPS, FINAL nt)]
58 | s : symbols' ->
59 | let edgeLabel = if isUpper s then NT s else T s
60 | in (MIDDLE n, edgeLabel, MIDDLE (n + 1)) : buildMiddleAndFinalStates (n + 1) symbols'
61 | in (INIT nt, EPS, CHOICE nt choiceNum) :
62 | (CHOICE nt choiceNum, EPS, MIDDLE 1) :
63 | buildMiddleAndFinalStates 1 exp
64 |
65 | main =
66 | do g <- getContents
67 | putStrLn (show (grammarToATNEnv g))
68 |
--------------------------------------------------------------------------------
/test/GrammarReader/output/generated_atn_lookup.txt:
--------------------------------------------------------------------------------
1 | [('S',[[(INIT 'S',EPS,CHOICE 'S' 1),(CHOICE 'S' 1,EPS,MIDDLE 1),(MIDDLE 1,NT 'A',MIDDLE 2),(MIDDLE 2,T 'c',MIDDLE 3),(MIDDLE 3,EPS,FINAL 'S')],[(INIT 'S',EPS,CHOICE 'S' 2),(CHOICE 'S' 2,EPS,MIDDLE 1),(MIDDLE 1,NT 'A',MIDDLE 2),(MIDDLE 2,T 'd',MIDDLE 3),(MIDDLE 3,EPS,FINAL 'S')]]),('A',[[(INIT 'A',EPS,CHOICE 'A' 1),(CHOICE 'A' 1,EPS,MIDDLE 1),(MIDDLE 1,T 'a',MIDDLE 2),(MIDDLE 2,NT 'A',MIDDLE 3),(MIDDLE 3,EPS,FINAL 'A')],[(INIT 'A',EPS,CHOICE 'A' 2),(CHOICE 'A' 2,EPS,MIDDLE 1),(MIDDLE 1,T 'b',MIDDLE 2),(MIDDLE 2,EPS,FINAL 'A')]])]
2 |
--------------------------------------------------------------------------------
/test/GrammarReader/sample_grammars/sample_bnf_grammar.txt:
--------------------------------------------------------------------------------
1 | S ::= Ac | Ad
2 | A ::= aA | b
3 |
4 |
--------------------------------------------------------------------------------
/test/allstar/AllStarTests.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE TypeFamilies #-}
2 |
3 | module AllStarTests where
4 |
5 | --import Test.HUnit
6 | import Text.ANTLR.Allstar.ParserGenerator
7 | import qualified Data.Set as DS
8 | import Text.ANTLR.Parser (HasEOF(..))
9 | import Text.ANTLR.Grammar (Ref(..))
10 |
11 | import Test.Framework
12 | import Test.Framework.Providers.HUnit
13 | import Text.ANTLR.HUnit
14 | import Text.ANTLR.Pretty
15 |
16 | --------------------------------TESTING-----------------------------------------
17 |
18 | {- instance Token Char where
19 | type Label Char = Char
20 | type Literal Char = Char
21 | getLabel c = c
22 | getLiteral c = c
23 |
24 | instance Token (a, b) where
25 | type Label (a, b) = a
26 | type Literal (a, b) = b
27 | getLabel (a, b) = a
28 | getLiteral (a, b) = b -}
29 |
30 | instance (Show a, Show b) => Prettify (Either a b) where prettify = rshow
31 |
32 | instance Ref Char where
33 | type Sym Char = Char
34 | getSymbol = id
35 |
36 | instance HasEOF Char where
37 | type StripEOF Char = Char
38 | isEOF c = False
39 | stripEOF c = Just c
40 |
41 | dumbTokenizer [] = []
42 | dumbTokenizer (t:ts) = [(t,ts)]
43 |
44 | atnEnv = DS.fromList [ -- First path through the 'S' ATN
45 | (Init 'S', GS EPS, Middle 'S' 0 0),
46 | (Middle 'S' 0 0, GS (NT 'A'), Middle 'S' 0 1),
47 | (Middle 'S' 0 1, GS (T 'c'), Middle 'S' 0 2),
48 | (Middle 'S' 0 2, GS EPS, Final 'S'),
49 |
50 | -- Second path through the 'S' ATN
51 | (Init 'S', GS EPS, Middle 'S' 1 0),
52 | (Middle 'S' 1 0, GS (NT 'A'), Middle 'S' 1 1),
53 | (Middle 'S' 1 1, GS (T 'd'), Middle 'S' 1 2),
54 | (Middle 'S' 1 2, GS EPS, Final 'S'),
55 |
56 | -- First path through the 'A' ATN
57 | (Init 'A', GS EPS, Middle 'A' 0 0),
58 | (Middle 'A' 0 0, GS (T 'a'), Middle 'A' 0 1),
59 | (Middle 'A' 0 1, GS (NT 'A'), Middle 'A' 0 2),
60 | (Middle 'A' 0 2, GS EPS, Final 'A'),
61 |
62 | -- Second path through the 'A' ATN
63 | (Init 'A', GS EPS, Middle 'A' 1 0),
64 | (Middle 'A' 1 0, GS (T 'b'), Middle 'A' 1 1),
65 | (Middle 'A' 1 1, GS EPS, Final 'A')]
66 |
67 |
68 | -- For now, I'm only checking whether the input was accepted--not checking the derivation.
69 |
70 | -- Example from the manual trace of ALL(*)'s execution
71 | parseTest1 = ((@=?) --"for parse dumbTokenizer [a, b, c],"
72 | (Right (Node 'S' [NT 'A', T 'c']
73 | [Node 'A' [T 'a', NT 'A']
74 | [Leaf 'a',
75 | Node 'A' [T 'b']
76 | [Leaf 'b']],
77 | Leaf 'c']))
78 | (parse dumbTokenizer ['a', 'b', 'c'] (NT 'S') atnEnv True))
79 |
80 | -- Example #1 from the ALL(*) paper
81 | parseTest2 = ((@=?) --"for parse dumbTokenizer [b, c],"
82 | (Right (Node 'S' [NT 'A', T 'c']
83 | [Node 'A' [T 'b']
84 | [Leaf 'b'],
85 | Leaf 'c']))
86 | (parse dumbTokenizer ['b', 'c'] (NT 'S') atnEnv True))
87 |
88 | -- Example #2 from the ALL(*) paper
89 | parseTest3 = ((@=?) --"for parse dumbTokenizer [b, d],"
90 | (Right (Node 'S' [NT 'A', T 'd']
91 | [Node 'A' [T 'b']
92 | [Leaf 'b'],
93 | Leaf 'd']))
94 | (parse dumbTokenizer ['b', 'd'] (NT 'S') atnEnv True))
95 |
96 | -- Input that requires more recursive traversals of the A ATN
97 | parseTest4 = ((@=?) --"for parse dumbTokenizer [a a a b c],"
98 | (Right (Node 'S' [NT 'A', T 'c']
99 | [Node 'A' [T 'a', NT 'A']
100 | [Leaf 'a',
101 | Node 'A' [T 'a', NT 'A']
102 | [Leaf 'a',
103 | Node 'A' [T 'a', NT 'A']
104 | [Leaf 'a',
105 | Node 'A' [T 'b']
106 | [Leaf 'b']]]],
107 | Leaf 'c']))
108 | (parse dumbTokenizer ['a', 'a', 'a', 'b', 'c'] (NT 'S') atnEnv True))
109 |
110 | -- Make sure that the result of parsing an out-of-language string has a Left tag.
111 | parseTest5 = ((@=?) --"for parse dumbTokenizer [a b a c],"
112 | True
113 | (let parseResult = parse dumbTokenizer ['a', 'b', 'a', 'c'] (NT 'S') atnEnv True
114 | isLeft pr = case pr of
115 | Left _ -> True
116 | _ -> False
117 | in isLeft parseResult))
118 |
119 | -- To do: Update these tests so that they use the new ATN state representation.
120 | {-
121 |
122 | conflictsTest = ((@=?) --"for getConflictSetsPerLoc()"
123 |
124 | ([[(MIDDLE 5, 1, []), (MIDDLE 5, 2, []),(MIDDLE 5, 3, [])],
125 | [(MIDDLE 5, 1, [MIDDLE 1]), (MIDDLE 5, 2, [MIDDLE 1])],
126 | [(MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])]] :: [[ATNConfig Char]])
127 |
128 | (getConflictSetsPerLoc (D [(MIDDLE 5, 1, []),
129 | (MIDDLE 5, 2, []),
130 | (MIDDLE 5, 3, []),
131 | (MIDDLE 5, 1, [MIDDLE 1]),
132 | (MIDDLE 5, 2, [MIDDLE 1]),
133 | (MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])])))
134 |
135 | prodsTest = ((@=?) --"for getProdSetsPerState()"
136 |
137 | ([[(MIDDLE 5, 1, []),
138 | (MIDDLE 5, 2, []),
139 | (MIDDLE 5, 3, []),
140 | (MIDDLE 5, 1, [MIDDLE 1]),
141 | (MIDDLE 5, 2, [MIDDLE 1])],
142 |
143 | [(MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])]] :: [[ATNConfig Char]])
144 |
145 | (getProdSetsPerState (D [(MIDDLE 5, 1, []),
146 | (MIDDLE 5, 2, []),
147 | (MIDDLE 5, 3, []),
148 | (MIDDLE 5, 1, [MIDDLE 1]),
149 | (MIDDLE 5, 2, [MIDDLE 1]),
150 | (MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])])))
151 |
152 | -}
153 |
154 |
155 | ambigATNEnv = DS.fromList [(Init 'S', GS EPS, Middle 'S' 0 0),
156 | (Middle 'S' 0 0, GS (T 'a'), Middle 'S' 0 1),
157 | (Middle 'S' 0 1, GS EPS, Final 'S'),
158 |
159 | (Init 'S', GS EPS, Middle 'S' 1 0),
160 | (Middle 'S' 1 0, GS (T 'a'), Middle 'S' 1 1),
161 | (Middle 'S' 1 1, GS EPS, Final 'S'),
162 |
163 | (Init 'S', GS EPS, Middle 'S' 2 0),
164 | (Middle 'S' 2 0, GS (T 'a'), Middle 'S' 2 1),
165 | (Middle 'S' 2 1, GS (T 'b'), Middle 'S' 2 2),
166 | (Middle 'S' 2 2, GS EPS, Final 'S')]
167 |
168 | ambigParseTest1 = ((@=?) --"for parse dumbTokenizer [a],"
169 | True
170 | (let parseResult = parse dumbTokenizer ['a'] (NT 'S') ambigATNEnv True
171 | isLeft pr = case pr of
172 | Left _ -> True
173 | _ -> False
174 | in isLeft parseResult))
175 |
176 | ambigParseTest2 = ((@=?) --"for parse dumbTokenizer [a b],"
177 | (Right (Node 'S' [T 'a', T 'b']
178 | [Leaf 'a',
179 | Leaf 'b']))
180 | (parse dumbTokenizer ['a', 'b'] (NT 'S') ambigATNEnv True))
181 |
182 |
183 | tests = [testCase "parseTest1" parseTest1,
184 | testCase "parseTest2" parseTest2,
185 | testCase "parseTest3" parseTest3,
186 | testCase "parseTest4" parseTest4,
187 | testCase "parseTest5" parseTest5,
188 |
189 | --testCase "conflictsTest" conflictsTest,
190 | --testCase "prodsTest" prodsTest,
191 |
192 | testCase "ambigParseTest1" ambigParseTest1,
193 | testCase "ambigParseTest2" ambigParseTest2]
194 |
195 | --main = runTestTT (TestList tests)
196 |
--------------------------------------------------------------------------------
/test/allstar/ConvertDFA.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-}
4 | module ConvertDFA where
5 | import Language.ANTLR4
6 |
7 | mkInt :: String -> Int
8 | mkInt _ = 3
9 |
10 | data ConvertIt =
11 | Start String
12 | | End Int
13 | deriving (Eq, Ord, Show)
14 |
15 | $( return [] )
16 |
17 | [g4|
18 | grammar Convert;
19 |
20 | root : 'START' LexemeA -> Start
21 | | 'END' LexemeB -> End
22 | ;
23 |
24 | LexemeA : 'abc' -> String ;
25 | LexemeB : 'efg' -> mkInt ;
26 | |]
27 |
28 |
--------------------------------------------------------------------------------
/test/allstar/ConvertP.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
4 | module ConvertP
5 | ( module ConvertDFA
6 | , module ConvertP
7 | ) where
8 | import Language.ANTLR4
9 | import ConvertDFA
10 |
11 | $(g4_parsers convertAST convertGrammar)
12 |
13 |
--------------------------------------------------------------------------------
/test/allstar/Main.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 |
3 | import System.IO.Unsafe (unsafePerformIO)
4 | import Data.Monoid
5 | import Test.Framework
6 | import Test.Framework.Providers.HUnit
7 | import Test.Framework.Providers.QuickCheck2
8 | import Test.HUnit
9 | import Test.QuickCheck (Property, quickCheck, (==>))
10 | import qualified Test.QuickCheck.Monadic as TQM
11 |
12 | import qualified AllStarTests
13 | import qualified ConvertP
14 | import ConvertP (ConvertIt(..))
15 |
16 | convertP_simple =
17 | case ConvertP.allstarParse (const False) "STARTabc" of
18 | (Right ast) -> ConvertP.ast2root ast @=? (Start "abc")
19 | (Left err) -> assertFailure err
20 |
21 | convertP_simple2 =
22 | case ConvertP.allstarParse (const False) "ENDefg" of
23 | (Right ast) -> ConvertP.ast2root ast @=? (End 3)
24 | (Left err) -> assertFailure err
25 |
26 | tests =
27 | [ testCase "convertP_simple" convertP_simple
28 | , testCase "convertP_simple2" convertP_simple2
29 | ]
30 |
31 | main :: IO ()
32 | main = defaultMainWithOpts
33 | (AllStarTests.tests ++ tests)
34 | mempty
35 |
36 |
--------------------------------------------------------------------------------
/test/allstar/README.md:
--------------------------------------------------------------------------------
1 |
2 | ## Sample grammar for ALL(\*)
3 |
4 | ```
5 | S -> Ac | Ad
6 |
7 | A -> aA | b
8 | ```
9 |
10 | ### ALL(\*) Input/output examples
11 |
12 | ```haskell
13 | *Test.AllStarTests> parse ['a', 'b', 'c'] (NT 'S') atnEnv
14 | (Just True, Node 'S' [Node 'A' [Leaf 'a', Node 'A' [Leaf 'b']], Leaf 'c'])
15 | ```
16 |
17 | ```haskell
18 | *Test.AllStarTests> parse ['b', 'd'] (NT 'S') atnEnv
19 | (Just True, Node 'S' [Node 'A' [Leaf 'b'], Leaf 'd'])
20 | ```
21 |
22 | ```haskell
23 | *Test.AllStarTests> parse ['a', 'a', 'a', 'a', 'b', 'c'] (NT 'S') atnEnv
24 | (Just True, Node 'S' [Node 'A' [Leaf 'a', Node 'A' [Leaf 'a', Node 'A' [Leaf 'a', Node 'A' [Leaf 'a', Nod
25 | ```
26 |
27 |
--------------------------------------------------------------------------------
/test/atn/ATN.hs:
--------------------------------------------------------------------------------
1 | module ATN where
2 |
3 | import Text.ANTLR.Grammar
4 | import Text.ANTLR.Allstar.ATN
5 | import Example.Grammar
6 | import Text.ANTLR.Set (fromList, union)
7 | import System.IO.Unsafe (unsafePerformIO)
8 |
9 | -- This is the Grammar from page 6 of the
10 | -- 'Adaptive LL(*) Parsing: The Power of Dynamic Analysis'
11 | -- paper, namely the expected transitions based on Figures
12 | -- 5 through 8:
13 | paperATNGrammar = (defaultGrammar "C" :: Grammar () String String ())
14 | { ns = fromList ["S", "A"]
15 | , ts = fromList ["a", "b", "c", "d"]
16 | , s0 = "C"
17 | , ps =
18 | [ production "S" $ Prod Pass [NT "A", T "c"]
19 | , production "S" $ Prod Pass [NT "A", T "d"]
20 | , production "A" $ Prod Pass [T "a", NT "A"]
21 | , production "A" $ Prod Pass [T "b"]
22 | ]
23 | }
24 |
25 | s i = ("S", i)
26 |
27 | -- Names as shown in paper:
28 | pS = Start "S"
29 | pS1 = Middle "S" 0 0
30 | p1 = Middle "S" 0 1
31 | p2 = Middle "S" 0 2
32 | pS2 = Middle "S" 1 0
33 | p3 = Middle "S" 1 1
34 | p4 = Middle "S" 1 2
35 | pS' = Accept "S"
36 |
37 | pA = Start "A"
38 | pA1 = Middle "A" 2 0
39 | p5 = Middle "A" 2 1
40 | p6 = Middle "A" 2 2
41 | pA2 = Middle "A" 3 0
42 | p7 = Middle "A" 3 1
43 | pA' = Accept "A"
44 |
45 | exp_paperATN = ATN
46 | { _Δ = fromList
47 | -- Submachine for S:
48 | [ (pS, Epsilon, pS1)
49 | , (pS1, NTE "A", p1)
50 | , (p1, TE "c", p2)
51 | , (p2, Epsilon, pS')
52 | , (pS, Epsilon, pS2)
53 | , (pS2, NTE "A", p3)
54 | , (p3, TE "d", p4)
55 | , (p4, Epsilon, pS')
56 | -- Submachine for A:
57 | , (pA, Epsilon, pA1)
58 | , (pA1, TE "a", p5)
59 | , (p5, NTE "A", p6)
60 | , (p6, Epsilon, pA')
61 | , (pA, Epsilon, pA2)
62 | , (pA2, TE "b", p7)
63 | , (p7, Epsilon, pA')
64 | ]
65 | }
66 |
67 | --always _ = True
68 | --never _ = False
69 |
70 | addPredicates = paperATNGrammar
71 | { ps =
72 | ps paperATNGrammar ++
73 | [ production "A" $ Prod (Sem (Predicate "always" ())) [T "a"]
74 | , production "A" $ Prod (Sem (Predicate "never" ())) []
75 | , production "A" $ Prod (Sem (Predicate "always2" ())) [NT "A", T "a"]
76 | ]
77 | }
78 |
79 | (pX,pY,pZ) = (Start "A", Start "A", Start "A")
80 | pX1 = Middle "A" 4 2
81 | pX2 = Middle "A" 4 0
82 | pX3 = Middle "A" 4 1
83 | pX4 = Accept "A"
84 |
85 | pY1 = Middle "A" 5 1
86 | pY2 = Middle "A" 5 0
87 | pY3 = Accept "A"
88 |
89 | pZ1 = Middle "A" 6 3
90 | pZ2 = Middle "A" 6 0
91 | pZ3 = Middle "A" 6 1
92 | pZ4 = Middle "A" 6 2
93 | pZ5 = Accept "A"
94 |
95 | exp_addPredicates = ATN
96 | { _Δ = union (_Δ exp_paperATN) $ fromList
97 | [ (pX, Epsilon, pX1)
98 | , (pX1, PE $ Predicate "always" (), pX2)
99 | , (pX2, TE "a", pX3)
100 | , (pX3, Epsilon, pX4)
101 |
102 | , (pY, Epsilon, pY1)
103 | , (pY1, PE $ Predicate "never" (), pY2)
104 | , (pY2, Epsilon, pY3)
105 |
106 | , (pZ, Epsilon, pZ1)
107 | , (pZ1, PE $ Predicate "always2" (), pZ2)
108 | , (pZ2, NTE "A", pZ3)
109 | , (pZ3, TE "a", pZ4)
110 | , (pZ4, Epsilon, pZ5)
111 | ]
112 | }
113 |
114 | fireZeMissiles state = seq
115 | (unsafePerformIO $ putStrLn "Missiles fired.")
116 | undefined
117 |
118 | addMutators = addPredicates
119 | { ps = ps addPredicates ++
120 | [ production "A" $ Prod (Action (Mutator "fireZeMissiles" ())) []
121 | , production "S" $ Prod (Action (Mutator "identity" ())) []
122 | ]
123 | }
124 |
125 | exp_addMutators = ATN
126 | { _Δ = union (_Δ exp_addPredicates) $ fromList
127 | [ (Start "A", Epsilon, Middle "A" 7 0)
128 | , (Middle "A" 7 0, ME $ Mutator "fireZeMissiles" (), Accept "A")
129 | , (Start "S", Epsilon, Middle "S" 8 0)
130 | , (Middle "S" 8 0, ME $ Mutator "identity" (), Accept "S")
131 | ]
132 | }
133 |
134 |
135 |
--------------------------------------------------------------------------------
/test/atn/Main.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 | import Text.ANTLR.Allstar.ATN
3 | import ATN
4 | import Text.ANTLR.Set (fromList, (\\))
5 |
6 | import System.IO.Unsafe (unsafePerformIO)
7 | import Data.Monoid
8 | import Test.Framework
9 | import Test.Framework.Providers.HUnit
10 | import Test.Framework.Providers.QuickCheck2
11 | import Test.HUnit
12 | import Test.QuickCheck (Property, quickCheck, (==>))
13 | import qualified Test.QuickCheck.Monadic as TQM
14 |
15 | -- ATNs should be same:
16 | test_paperATNGrammar =
17 | atnOf paperATNGrammar
18 | @?=
19 | exp_paperATN
20 |
21 | -- Set difference to make debugging easier:
22 | test_paperATNGrammar2 =
23 | ((_Δ . atnOf) paperATNGrammar \\ _Δ exp_paperATN)
24 | @?=
25 | fromList []
26 |
27 | test_addPredicates =
28 | atnOf addPredicates
29 | @?=
30 | exp_addPredicates
31 |
32 | test_addPredicates2 =
33 | ((_Δ . atnOf) addPredicates \\ _Δ exp_addPredicates)
34 | @?=
35 | fromList []
36 |
37 | test_addMutators =
38 | atnOf addMutators
39 | @?=
40 | exp_addMutators
41 |
42 | main :: IO ()
43 | main = defaultMainWithOpts
44 | [ testCase "paper_ATN_Grammar" test_paperATNGrammar
45 | , testCase "paper_ATN_Grammar2" test_paperATNGrammar2
46 | , testCase "paper_ATN_Predicates2" test_addPredicates2
47 | , testCase "paper_ATN_Predicates" test_addPredicates
48 | , testCase "paper_ATN_Mutators" test_addMutators
49 | ] mempty
50 |
51 |
--------------------------------------------------------------------------------
/test/c/Main.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 | import Language.ANTLR4
3 | import CParser
4 |
5 | main :: IO ()
6 | main = print "done"
7 |
8 |
--------------------------------------------------------------------------------
/test/chisel/Language/Chisel/Grammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances #-}
4 | module Language.Chisel.Grammar
5 | ( ChiselNTSymbol(..), ChiselTSymbol(..), ChiselAST
6 | , lowerID, upperID, prim, int, arrow, lparen, rparen, pound
7 | , vertbar, colon, comma, atsymbol, carrot, dot, linecomm, ws
8 | , Primitive(..), chiselGrammar, TokenValue(..)
9 | , chiselAST, TokenName(..), chiselDFAs, lexeme2value, isWhitespace
10 | , ChiselToken(..), list, cons, append
11 | ) where
12 | import Language.ANTLR4
13 | import Language.Chisel.Syntax as S
14 |
15 | list a = [a]
16 | cons = (:)
17 | append = (++)
18 |
19 | [g4|
20 | grammar Chisel;
21 | chiselProd : prodSimple
22 | | '(' prodSimple ')'
23 | ;
24 |
25 | prodSimple : prodID formals magnitude alignment '->' group -> S.prodFMA
26 | | prodID formals '->' group -> S.prodF
27 | | prodID magnitude alignment '->' group -> S.prodMA
28 | | prodID magnitude '->' group -> S.prodM
29 | | LowerID prodID magnitude alignment '->' group -> S.prodNMA
30 | ;
31 |
32 | formals : LowerID formals -> cons
33 | | LowerID -> list
34 | ;
35 |
36 | magnitude : '|' '#' sizeArith '|' -> magWild
37 | | '|' sizeArith '|' -> magNorm
38 | | '|' prodID '|' -> magID
39 | ;
40 |
41 | alignment : '@' '(' sizeArith ')';
42 |
43 | group : groupExp1 -> list
44 | | '(' groupExp ')'
45 | ;
46 |
47 | groupExp : groupExp1 -> list
48 | | groupExp1 ',' groupExp -> cons
49 | ;
50 |
51 | groupExp1 : '#' chiselProd -> gProdWild
52 | | '#' sizeArith -> gSizeWild
53 | | '(' flags ')' -> GFlags
54 | | chiselProd -> gProdNorm
55 | | sizeArith -> gSizeNorm
56 | | label -> GLabel
57 | | arith chiselProd -> gProdArith
58 | | arith prodApp -> GProdApp
59 | | '(' labels ')' -> GLabels
60 | ;
61 |
62 | flags : prodID -> list
63 | | prodID '|' flags -> cons
64 | ;
65 |
66 | labels : label -> list
67 | | label '|' labels -> cons
68 | ;
69 |
70 | label : LowerID ':' labelExp -> Label
71 | ;
72 |
73 | labelExp : '#' chiselProd -> lProdWild
74 | | '#' prodApp -> lProdAppWild
75 | | '#' sizeArith -> lSizeWild
76 | | chiselProd -> lProd
77 | | prodApp -> lProdApp
78 | | sizeArith -> lSize
79 | ;
80 |
81 | prodApp : prodID prodApp -> cons
82 | | prodID -> list
83 | ;
84 |
85 | sizeArith : arith Prim -> SizeArith
86 | | Prim -> singleArith
87 | ;
88 |
89 | arith : INT -> SizeInt
90 | | LowerID -> SizeID
91 | | arith '^' arith -> SizeExp
92 | ;
93 |
94 | prodID : UpperID -> id
95 | | UpperID '.' prodID -> append
96 | ;
97 |
98 | Prim : ( 'bit' | 'byte' ) 's'? -> Primitive;
99 | ArchPrim : ( 'page' | 'word' ) 's'? -> Primitive;
100 | UpperID : [A-Z][a-zA-Z0-9_]* -> String;
101 | LowerID : [a-z][a-zA-Z0-9_]* -> String;
102 | INT : [0-9]+ -> Int;
103 | LineComment : '//' (~ '\n')* '\n' -> String;
104 | WS : [ \t\n\r\f\v]+ -> String;
105 | |]
106 |
107 | -- Types used to the right of the '->' directive must instance Read
108 |
109 | isWhitespace T_LineComment = True
110 | isWhitespace T_WS = True
111 | isWhitespace _ = False
112 |
113 | {- Helper functions to construct all the various Tokens from either the desired
114 | - (arbitrary) lexeme or by looking it up based on the static lexeme it always
115 | - matches. -}
116 | lowerID x = Token T_LowerID (V_LowerID x) (length x)
117 | upperID x = Token T_UpperID (V_UpperID x) (length x)
118 | prim x = Token T_Prim (V_Prim x) (length $ show x)
119 | int x = Token T_INT (V_INT x) (length $ show x)
120 | arrow = lookupToken "->"
121 | lparen = lookupToken "("
122 | rparen = lookupToken ")"
123 | pound = lookupToken "#"
124 | vertbar = lookupToken "|"
125 | colon = lookupToken ":"
126 | comma = lookupToken ","
127 | atsymbol = lookupToken "@"
128 | carrot = lookupToken "^"
129 | dot = lookupToken "."
130 | linecomm x = Token T_LineComment (V_LineComment x) (length x)
131 | ws x = Token T_WS (V_WS x) (length x)
132 |
133 |
--------------------------------------------------------------------------------
/test/chisel/Language/Chisel/Parser.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell #-}
4 | module Language.Chisel.Parser
5 | ( module Language.Chisel.Grammar
6 | , Language.Chisel.Parser.tokenize, glrParseFast
7 | , parse, glrParse
8 | ) where
9 | import Language.ANTLR4
10 | import Language.Chisel.Syntax as S
11 | import Language.Chisel.Grammar
12 |
13 | $(g4_parsers chiselAST chiselGrammar)
14 |
15 | $(mkLRParser chiselAST chiselGrammar)
16 |
17 | parse = glrParse isWhitespace
18 |
19 |
20 |
--------------------------------------------------------------------------------
/test/chisel/Language/Chisel/Syntax.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric #-}
2 | module Language.Chisel.Syntax where
3 | import Language.ANTLR4 (Data(..))
4 | import Text.ANTLR.Pretty
5 | import Text.ANTLR.Set (Hashable(..), Generic(..))
6 |
7 | data ChiselProd = ChiselProd
8 | { prodID :: UpperID
9 | , count :: Maybe LowerID
10 | , formals :: Maybe [Formal]
11 | , magnitude :: Maybe Magnitude
12 | , alignment :: Maybe SizeArith
13 | , rhs :: Group
14 | }
15 |
16 | prodFMA s f m a = ChiselProd s Nothing (Just f) (Just m) (Just a)
17 | prodF s f = ChiselProd s Nothing (Just f) Nothing Nothing
18 | prodMA s m a = ChiselProd s Nothing Nothing (Just m) (Just a)
19 | prodM s m = ChiselProd s Nothing Nothing (Just m) Nothing
20 | prodNMA s n m a = ChiselProd s (Just n) Nothing (Just m) (Just a)
21 |
22 | type Formal = String
23 |
24 | -- Whether or not '#' was used:
25 | type Wild = Bool
26 |
27 | data Magnitude = Mag Wild SizeArith
28 |
29 | magWild = Mag True -- Variable magnitude
30 | magNorm = Mag False -- Fixed magnitude
31 | magID = Mag False . SizeID
32 |
33 | type Alignment = SizeArith
34 |
35 | type Group = [GroupExp]
36 |
37 | -- Left False == no size annotation
38 | -- Left True == '#' annotation
39 | -- Right sz == fixed size annotation
40 | type Count = Either Wild SizeArith
41 |
42 | data GroupExp =
43 | GProd Count ChiselProd
44 | | GSize Wild SizeArith
45 | | GFlags [Flag]
46 | | GLabel Label
47 | | GLabels [Label]
48 | | GProdApp SizeArith ProdApp
49 |
50 | gProdWild = GProd (Left True)
51 | gProdNorm = GProd (Left False)
52 | gProdArith a = GProd (Right a)
53 |
54 | gSizeWild = GSize True
55 | gSizeNorm = GSize False
56 |
57 | type Flag = ProdID
58 |
59 | type ProdApp = [ProdID]
60 |
61 | type UpperID = String
62 | type LowerID = String
63 | type ProdID = String
64 | type LabelID = String
65 |
66 | data Label = Label LabelID LabelExp
67 |
68 | data LabelExp =
69 | LProd Wild ChiselProd
70 | | LProdApp Wild ProdApp
71 | | LSize Wild SizeArith
72 |
73 | lProdWild = LProd True
74 | lProd = LProd False
75 |
76 | lProdAppWild = LProdApp True
77 | lProdApp = LProdApp False
78 |
79 | lSizeWild = LSize True
80 | lSize = LSize False
81 |
82 | data SizeArith =
83 | SizeInt Int
84 | | SizeID LowerID
85 | | SizeExp SizeArith SizeArith
86 | | SizeArith SizeArith Primitive
87 |
88 | singleArith = SizeArith (SizeInt 1)
89 |
90 | data Primitive = Page | Word | Byte | Bit
91 | deriving (Show, Eq, Ord, Generic, Hashable, Data)
92 |
93 | lexeme2prim "page" = Just Page
94 | lexeme2prim "pages" = Just Page
95 | lexeme2prim "word" = Just Word
96 | lexeme2prim "words" = Just Word
97 | lexeme2prim "byte" = Just Byte
98 | lexeme2prim "bytes" = Just Byte
99 | lexeme2prim "bit" = Just Bit
100 | lexeme2prim "bits" = Just Bit
101 | lexeme2prim _ = Nothing
102 |
103 | instance Read Primitive where
104 | readsPrec _ input = case lexeme2prim input of
105 | Just x -> [(x,"")]
106 | Nothing -> []
107 |
108 | instance Prettify Primitive where prettify = rshow
109 |
110 |
--------------------------------------------------------------------------------
/test/chisel/Main.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE QuasiQuotes, TemplateHaskell, ScopedTypeVariables #-}
2 | module Main where
3 | -- Project imports go here, e.g.:
4 | --import Language.Chisel.Tokenizer
5 | import Text.ANTLR.Lex.Tokenizer (Token(..))
6 | import Text.ANTLR.Parser (AST(..))
7 | import Language.Chisel.Parser
8 | import Language.Chisel.Syntax
9 | import Text.ANTLR.Grammar (Grammar(..), ProdElem(..))
10 | import Language.ANTLR4.FileOpener (open)
11 | import Language.ANTLR4.Boot.Syntax (Directive(..))
12 |
13 | import System.IO.Unsafe (unsafePerformIO)
14 | import Data.Monoid
15 | import Test.Framework
16 | import Test.Framework.Providers.HUnit
17 | import Test.Framework.Providers.QuickCheck2
18 | import Test.HUnit hiding ((@?=), assertEqual)
19 | import Test.QuickCheck (Property, quickCheck, (==>))
20 | import qualified Test.QuickCheck.Monadic as TQM
21 |
22 | import Text.ANTLR.HUnit
23 | import Debug.Trace as D
24 | import qualified Text.ANTLR.LR as LR
25 | import Text.ANTLR.Pretty (pshow)
26 | import qualified Data.Text as T
27 |
28 | chi = id
29 |
30 | ghc_val = [open| test/chisel/Language/Chisel/Examples/GHC.chi |]
31 | tokenizeGHC_val = tokenize ghc_val
32 |
33 | tokenizeGHC_exp =
34 | [ upperID "Heap", ws " ", lowerID "m", ws " ", lowerID "k", ws " ", arrow
35 | , ws "\n ", pound, ws " ", upperID "MegaBlock", ws " "
36 | , vertbar, int 2, carrot, lowerID "m", ws " ", prim Byte, vertbar, ws " "
37 | , atsymbol, lparen, int 2, carrot, lowerID "m", ws " ", prim Byte, rparen
38 | , ws " ", arrow, ws "\n ", linecomm "// -------Megablock \"Header\"------------------"
39 | , ws "\n ", lparen, ws " ", upperID "Descrs", ws " "
40 | , vertbar, int 2, carrot, lowerID "k", ws " ", prim Byte, vertbar, ws " "
41 | , arrow, ws "\n ", lparen, ws " ", lowerID "padMB", ws " ", prim Byte
42 | , ws "\n ", linecomm "// +++++++++++++++++++++++++++++++++++++++++\n"
43 | , ws " ", comma, ws " ", lowerID "bds", ws " "
44 | , colon, ws " ", lowerID "n", ws " ", upperID "BlockDescr", ws " "
45 | , vertbar, int 2, carrot, lowerID "d", ws " ", prim Byte, vertbar
46 | , ws " ", atsymbol, lparen, int 2, carrot, lowerID "k", ws " ", prim Byte, rparen
47 | , ws " ", arrow, ws "\n "
48 | , lparen, ws " ", lowerID "start", ws " ", colon, ws " "
49 | , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", ws "\n "
50 | , comma, ws " ", lowerID "free", ws " ", colon, ws " "
51 | , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", ws "\n "
52 | , comma, ws " ", lowerID "link", ws " ", colon, ws " "
53 | , upperID "Ptr", ws " ", upperID "BlockDescr", ws "\n "
54 | , comma, ws " ", lparen, ws " ", lowerID "back", ws " ", colon, ws " "
55 | , upperID "Ptr", ws " ", upperID "BlockDescr", ws "\n "
56 | , vertbar, ws " ", lowerID "bitmap", ws " ", colon, ws " "
57 | , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", ws "\n "
58 | , vertbar, ws " ", lowerID "scan", ws " ", colon, ws " "
59 | , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", rparen, ws "\n "
60 | , comma, ws " ", lowerID "gen", ws " ", colon, ws " "
61 | , upperID "Ptr", ws " ", upperID "Generation", ws "\n ", comma, ws " "
62 | , lowerID "gen_no", ws " ", colon, ws " "
63 | , upperID "Stg", dot, upperID "Word16", ws "\n "
64 | , comma, ws " ", lowerID "dest_no", ws " ", colon, ws " "
65 | , upperID "Stg", dot, upperID "Word16", ws "\n "
66 | , comma, ws " ", lowerID "node", ws " ", colon, ws " "
67 | , upperID "Stg", dot, upperID "Word16", ws "\n "
68 | , comma, ws " ", upperID "Flags", ws " "
69 | , vertbar, upperID "Stg", dot, upperID "Word16", vertbar, ws " ", arrow, ws "\n "
70 | , lparen, ws " ", upperID "LARGE", ws " ", vertbar, ws " "
71 | , upperID "EVACUATED", ws "", vertbar, ws " "
72 | , upperID "FREE", ws "\n ", vertbar, ws " "
73 | , upperID "PINNED", ws " ", vertbar, ws " "
74 | , upperID "MARKED", ws " ", vertbar, ws " "
75 | , upperID "KNOWN", ws "\n ", vertbar, ws " "
76 | , upperID "EXEC", ws " ", vertbar, ws " "
77 | , upperID "FRAGMENTED", ws " ", vertbar, ws " "
78 | , upperID "SWEPT", ws "\n ", vertbar, ws " "
79 | , upperID "COMPACT", ws " ", rparen, ws "\n ", comma, ws " "
80 | , lowerID "n_blocks", ws " ", colon, ws " "
81 | , upperID "Stg", dot, upperID "Word32", ws "\n "
82 | , comma, ws " ", lowerID "padD", ws " ", prim Byte, rparen, rparen, ws "\n "
83 | , linecomm "// +++++++++++++++++++++++++++++++++++++++++\n", ws " "
84 | , linecomm "// -------Megablock payload-------------------\n", ws " ", comma, ws " "
85 | , lowerID "blocks", ws " ", colon, ws " ", lowerID "n", ws " "
86 | , upperID "Block", ws " ", vertbar, int 2, carrot
87 | , lowerID "k", ws " ", prim Byte, vertbar, ws " "
88 | , atsymbol, lparen, int 2, carrot, lowerID "k", ws " ", prim Byte, rparen
89 | , ws " ", arrow, ws "\n ", lparen, ws " "
90 | , lowerID "closures", ws " ", colon, ws " ", pound, ws " "
91 | , upperID "Stg", dot, upperID "Closures", ws "\n ", comma, ws " "
92 | , lowerID "free", ws " ", colon, ws " ", pound, ws " ", prim Byte, rparen
93 | , ws "\n ", rparen, ws "\n", EOF
94 | ]
95 |
96 | tokenizeGHC =
97 | tokenizeGHC_val
98 | @?=
99 | tokenizeGHC_exp
100 |
101 | tokenizeGHC2 =
102 | dropWhile (\(a,b) -> a == b) (zip tokenizeGHC_val tokenizeGHC_exp)
103 | @?=
104 | []
105 |
106 | tokenizeSmall = tokenize "Foo x -> x Bar"
107 |
108 | parseTestSmall =
109 | D.traceShowId (parse "Foo x -> x Bar")
110 | @?=
111 | LR.ResultAccept
112 | ( AST NT_chiselProd [NT NT_prodSimple]
113 | [ AST NT_prodSimple [NT NT_prodID, NT NT_formals, T T_2, NT NT_group]
114 | [ AST NT_prodID [T T_UpperID] [Leaf $ upperID "Foo"]
115 | , AST NT_formals [T T_LowerID]
116 | [ Leaf $ lowerID "x"]
117 | , Leaf arrow
118 | , AST NT_group [NT NT_groupExp1]
119 | [ AST NT_groupExp1 [NT NT_arith, NT NT_prodApp]
120 | [ AST NT_arith [T T_LowerID] [Leaf $ lowerID "x"]
121 | , AST NT_prodApp [NT NT_prodID]
122 | [ AST NT_prodID [T T_UpperID] [Leaf $ upperID "Bar"] ]
123 | ]]]])
124 |
125 | tokenizeSmallTest =
126 | tokenizeSmall
127 | @?=
128 | [upperID "Foo", ws " ", lowerID "x", ws " ", arrow, ws " ", lowerID "x", ws " ", upperID "Bar", EOF]
129 |
130 | parseGHCTestBig =
131 | case parse ghc_val of
132 | (LR.ResultAccept _) -> (1 :: Int) @?= 1
133 | e -> e @?= LR.ResultAccept LeafEps
134 |
135 | testPrettify =
136 | unsafePerformIO (putStr $ T.unpack $ pshow (chiselGrammar :: Grammar () ChiselNTSymbol ChiselTSymbol Directive))
137 | @?= ()
138 |
139 | testFast =
140 | glrParseFast isWhitespace "Foo x -> x Bar"
141 | @?=
142 | glrParse isWhitespace "Foo x -> x Bar"
143 |
144 | main :: IO ()
145 | main = defaultMainWithOpts
146 | [ testCase "Tokenize GHC" tokenizeGHC
147 | , testCase "Tokenize GHC2" tokenizeGHC2
148 | , testCase "Tokenize Small" tokenizeSmallTest
149 | , testCase "Parse Test (Small)" parseTestSmall
150 | , testCase "Parse GHC" parseGHCTestBig
151 | , testCase "Prettify speed" testPrettify
152 | , testCase "Run glrParseFast build" testFast
153 | ] mempty
154 |
155 |
--------------------------------------------------------------------------------
/test/coreg4/G4.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, DeriveDataTypeable #-}
4 | module G4 where
5 | import Language.ANTLR4
6 |
7 | [g4|
8 | grammar G4Basic;
9 | exp : '1'
10 | | '2'
11 | | '3'
12 | ;
13 | |]
14 |
15 |
--------------------------------------------------------------------------------
/test/coreg4/G4Fast.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE TemplateHaskell #-}
2 | module G4Fast where
3 | import Language.ANTLR4
4 | import G4
5 | import G4Parser
6 |
7 | $(mkLRParser g4BasicAST g4BasicGrammar)
8 |
9 | {-
10 | import System.IO.Unsafe (unsafePerformIO)
11 | import Data.Monoid
12 | import Test.Framework
13 | import Test.Framework.Providers.HUnit
14 | import Test.Framework.Providers.QuickCheck2
15 | import Test.HUnit
16 | import Test.QuickCheck (Property, quickCheck, (==>))
17 | import qualified Test.QuickCheck.Monadic as TQM
18 | -}
19 |
20 | {-
21 | import Language.ANTLR4 hiding (tokenize)
22 | import Text.ANTLR.Grammar
23 | import qualified Language.ANTLR4.Example.Optionals as Opt
24 | import Language.ANTLR4.Example.G4 as G4
25 | --import Language.ANTLR4.Example.G4 (g4BasicGrammar, G4BasicNTSymbol, G4BasicTSymbol, G4BasicAST)
26 | import Text.ANTLR.Parser (AST(..))
27 | import qualified Text.ANTLR.LR as LR
28 | import qualified Text.ANTLR.Lex.Tokenizer as T
29 | -}
30 |
31 | {-
32 | import Data.Map.Internal (fromList)
33 | import qualified Text.ANTLR.MultiMap as M
34 | import Text.ANTLR.MultiMap (Map(..))
35 | import qualified Text.ANTLR.Set as S
36 | import qualified Data.HashSet as H
37 | -}
38 |
39 |
--------------------------------------------------------------------------------
/test/coreg4/G4Parser.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
4 | module G4Parser where
5 | import Language.ANTLR4
6 | import G4
7 |
8 | $(g4_parsers g4BasicAST g4BasicGrammar)
9 |
10 |
--------------------------------------------------------------------------------
/test/coreg4/Hello.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, DeriveDataTypeable #-}
4 | module Hello where
5 | import Language.ANTLR4
6 |
7 | [g4|
8 | // Hello World grammar
9 | // https://github.com/antlr/grammars-v4/blob/master/antlr4/examples/Hello.g4
10 | grammar Hello;
11 | r : 'hello' WS ID;
12 | ID : [a-zA-Z]+ -> String;
13 | WS : [ \t\r\n]+ -> String;
14 | |]
15 |
16 |
--------------------------------------------------------------------------------
/test/coreg4/HelloParser.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE QuasiQuotes, TemplateHaskell, DeriveAnyClass, DeriveGeneric, TypeFamilies
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, DeriveDataTypeable, FlexibleContexts #-}
4 | module HelloParser where
5 | import Language.ANTLR4
6 | import Hello
7 |
8 | $(g4_parsers helloAST helloGrammar)
9 |
10 |
--------------------------------------------------------------------------------
/test/coreg4/Main.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 |
3 | import System.IO.Unsafe (unsafePerformIO)
4 | import Data.Monoid
5 | import Test.Framework
6 | import Test.Framework.Providers.HUnit
7 | import Test.Framework.Providers.QuickCheck2
8 | import Test.HUnit
9 | import Test.QuickCheck (Property, quickCheck, (==>))
10 | import qualified Test.QuickCheck.Monadic as TQM
11 |
12 | import Language.ANTLR4 hiding (tokenize, Regex(..))
13 | import qualified G4Parser as G4P
14 | import qualified G4 as G4
15 | import Hello
16 | import HelloParser
17 | import Text.ANTLR.Parser (AST(..))
18 | import qualified Text.ANTLR.LR as LR
19 | import Language.ANTLR4.Boot.Syntax (Regex(..))
20 | --import Language.ANTLR4.Regex (parseRegex)
21 |
22 | import qualified Language.ANTLR4.G4 as P -- Parser
23 |
24 | import qualified G4Fast as Fast
25 |
26 | test_g4_basic_type_check = do
27 | let _ = G4.g4BasicGrammar
28 | 1 @?= 1
29 |
30 | hello_g4_test_type_check = do
31 | let _ = helloGrammar
32 | 1 @?= 1
33 |
34 | {-
35 | regex_test = do
36 | parseRegex "[ab]* 'a' 'b' 'b'"
37 | @?= Right
38 | (Concat
39 | [ Kleene $ CharSet "ab"
40 | , Literal "a"
41 | , Literal "b"
42 | , Literal "b"
43 | ])
44 | -}
45 |
46 | _1 = G4.lookupToken "1"
47 |
48 | -- TODO: implement 'read' instance for TokenValue type so that I don't have to
49 | -- hardcode the name for literal terminals (e.g. '1' == T_0 below)
50 | test_g4 =
51 | G4P.slrParse (G4P.tokenize "1")
52 | @?=
53 | LR.ResultAccept (AST G4.NT_exp [T G4.T_0] [Leaf _1])
54 |
55 | test_hello =
56 | slrParse (tokenize "hello Matt")
57 | @?=
58 | (LR.ResultAccept $
59 | AST NT_r [T T_0, T T_WS, T T_ID]
60 | [ Leaf (Token T_0 V_0 1)
61 | , Leaf (Token T_WS (V_WS " ") 1)
62 | , Leaf (Token T_ID (V_ID "Matt") 4)
63 | ]
64 | )
65 |
66 | test_hello_allstar =
67 | allstarParse (const False) ("hello Matt")
68 | @?=
69 | Right (AST NT_r [T T_0, T T_WS, T T_ID] [Leaf (Token T_0 V_0 5),Leaf (Token T_WS (V_WS " ") 1),Leaf
70 | (Token T_ID (V_ID "Matt") 4)])
71 | --Right (AST NT_r [] [])
72 |
73 | testFastGLR =
74 | Fast.glrParseFast (const False) "3"
75 | @?=
76 | G4P.glrParse (const False) "3"
77 |
78 | main :: IO ()
79 | main = defaultMainWithOpts
80 | [ testCase "g4_basic_compilation_type_check" test_g4_basic_type_check
81 | , testCase "hello_parse_type_check" hello_g4_test_type_check
82 | -- , testCase "regex_test" regex_test
83 | , testCase "test_g4" test_g4
84 | , testCase "test_hello" test_hello
85 | , testCase "test_hello_allstar" test_hello_allstar
86 | , testCase "testFastGLR" testFastGLR
87 | ] mempty
88 |
89 |
--------------------------------------------------------------------------------
/test/g4/DoubleSemi.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-}
4 | module DoubleSemi where
5 | import Language.ANTLR4
6 |
7 | $( return [] )
8 |
9 | [g4|
10 | grammar Dbl;
11 | dbl : 'a' | 'f' ; // ;
12 | WS : [ \t\r\n]+ -> String;
13 | |]
14 |
15 | isWS T_WS = True
16 | isWS _ = False
17 |
18 |
--------------------------------------------------------------------------------
/test/g4/DoubleSemiP.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
4 | module DoubleSemiP
5 | ( module DoubleSemiP
6 | , module DoubleSemi
7 | ) where
8 | import Language.ANTLR4
9 | import DoubleSemi
10 |
11 | $(g4_parsers dblAST dblGrammar)
12 |
13 |
--------------------------------------------------------------------------------
/test/g4/Empty.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-}
4 | module Empty where
5 | import Language.ANTLR4
6 |
7 | $( return [] )
8 |
9 | [g4|
10 | grammar Empty;
11 | emp : 'a' b | 'f' d ;
12 | b : 'c'
13 | | // empty!
14 | ;
15 |
16 | d : // empty!
17 | | 'd'
18 | ;
19 |
20 | WS : [ \t\r\n]+ -> String;
21 | |]
22 |
23 | isWS T_WS = True
24 | isWS _ = False
25 |
26 |
--------------------------------------------------------------------------------
/test/g4/EmptyP.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
4 | module EmptyP
5 | ( module Empty
6 | , module EmptyP
7 | ) where
8 | import Language.ANTLR4
9 | import Empty
10 |
11 | $(g4_parsers emptyAST emptyGrammar)
12 |
13 |
--------------------------------------------------------------------------------
/test/g4/Main.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE TemplateHaskell, FlexibleContexts, TypeFamilies #-}
2 | module Main where
3 |
4 | import System.IO.Unsafe (unsafePerformIO)
5 | import Data.Monoid
6 | import Test.Framework
7 | import Test.Framework.Providers.HUnit
8 | import Test.Framework.Providers.QuickCheck2
9 | import Test.HUnit
10 | import Test.QuickCheck (Property, quickCheck, (==>))
11 | import qualified Test.QuickCheck.Monadic as TQM
12 |
13 | import Language.ANTLR4 hiding (tokenize, Regex(..))
14 | import Text.ANTLR.Grammar
15 | --import Language.ANTLR4.Regex
16 | import Text.ANTLR.Parser (AST(..))
17 | import qualified Text.ANTLR.LR as LR
18 | import qualified Text.ANTLR.Lex.Tokenizer as T
19 |
20 | import qualified Language.ANTLR4.G4 as P -- Parser
21 |
22 | import qualified Optional as Opt
23 | import qualified OptionalParser as Opt
24 |
25 | import qualified EmptyP as E
26 | import qualified Empty as E
27 |
28 | test_optional =
29 | Opt.glrParse Opt.isWS "a"
30 | @?=
31 | (LR.ResultAccept $ AST Opt.NT_r [NT Opt.NT_a] [AST Opt.NT_a [T Opt.T_1] [Leaf (Token Opt.T_1 Opt.V_1 1)]])
32 |
33 | test_optional2 =
34 | case Opt.glrParse Opt.isWS "a a e b c d" of
35 | LR.ResultAccept ast -> Opt.ast2r ast @?= "accept"
36 | err -> error $ show err
37 |
38 | test_optional3 =
39 | case Opt.glrParse Opt.isWS "a e b b b c c d" of
40 | LR.ResultAccept ast -> Opt.ast2r ast @?= "accept"
41 | err -> error $ show err
42 |
43 | test_optional4 =
44 | case Opt.glrParse Opt.isWS "a" of
45 | LR.ResultAccept ast -> Opt.ast2r ast @?= "reject"
46 | err -> error $ show err
47 |
48 | test_e v =
49 | case v of
50 | LR.ResultAccept ast -> E.ast2emp ast @?= ()
51 | err -> error $ show err
52 | test_e_fail v =
53 | case v of
54 | LR.ResultAccept ast -> assertFailure $ show ast
55 | err -> () @?= ()
56 |
57 | test_empty = test_e (E.slrParse (E.tokenize "a"))
58 | test_empty2 = test_e (E.slrParse (E.tokenize "f"))
59 | test_empty3 = test_e (E.slrParse (E.tokenize "ac"))
60 | test_empty4 = test_e (E.slrParse (E.tokenize "fd"))
61 | test_empty5 = test_e_fail (E.slrParse (E.tokenize "fc"))
62 | test_empty6 = test_e_fail (E.slrParse (E.tokenize "ad"))
63 |
64 | main :: IO ()
65 | main = defaultMainWithOpts
66 | [ testCase "test_optional" test_optional
67 | , testCase "test_optional2" test_optional2
68 | , testCase "test_optional3" test_optional3
69 | , testCase "test_optional4" test_optional4
70 | , testCase "test_empty" test_empty
71 | , testCase "test_empty2" test_empty2
72 | , testCase "test_empty3" test_empty3
73 | , testCase "test_empty4" test_empty4
74 | , testCase "test_empty5" test_empty5
75 | , testCase "test_empty6" test_empty6
76 | ] mempty
77 |
78 |
--------------------------------------------------------------------------------
/test/g4/Optional.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-}
4 | module Optional where
5 | import Language.ANTLR4
6 |
7 | opt a b c d = (1, 'b', 2.0, [1,2,3])
8 |
9 | foo :: () -> Maybe (Int, Char, Double, [Int]) -> String
10 | foo a1 (Just (a,b,c,d)) = "accept"
11 | foo a1 Nothing = "reject"
12 |
13 | $( return [] )
14 |
15 | [g4|
16 | grammar Optional;
17 | r : a s? -> foo;
18 | s : a? 'e' b* c+ d -> opt;
19 | a : 'a';
20 | b : 'b';
21 | c : 'c';
22 | d : 'd';
23 |
24 | ID : [a-zA-Z]+ -> String;
25 | WS : [ \t\r\n]+ -> String;
26 | |]
27 |
28 | isWS T_WS = True
29 | isWS _ = False
30 |
31 |
--------------------------------------------------------------------------------
/test/g4/OptionalParser.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-}
4 | module OptionalParser where
5 | import Language.ANTLR4
6 | import Optional
7 |
8 | $(g4_parsers optionalAST optionalGrammar)
9 |
10 |
--------------------------------------------------------------------------------
/test/lexer/Main.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 |
3 | import Text.ANTLR.Lex
4 | import Text.ANTLR.Lex.Automata
5 | import Text.ANTLR.Lex.NFA as NFA
6 | import qualified Text.ANTLR.Lex.DFA as DFA
7 |
8 | import Text.ANTLR.Lex.Regex
9 |
10 | import System.IO.Unsafe (unsafePerformIO)
11 | import Data.Monoid
12 | import Test.Framework
13 | import Test.Framework.Providers.HUnit
14 | import Test.Framework.Providers.QuickCheck2
15 | import Test.HUnit
16 | import Test.QuickCheck (Property, quickCheck, (==>))
17 | import qualified Test.QuickCheck.Monadic as TQM
18 |
19 | import Text.ANTLR.Set (fromList, Set(..), singleton, Hashable)
20 |
21 | singleEdge s = (False, singleton s)
22 |
23 | fL :: (Hashable a, Eq a) => [a] -> Set a
24 | fL = fromList
25 |
26 | nfa0 :: NFA Char Int
27 | nfa0 = Automata
28 | { _S = fL [0, 1, 2, 3]
29 | , _Σ = fL "ab"
30 | , s0 = 0
31 | , _F = fL [3]
32 | , _Δ = fL
33 | [ (0, singleEdge $ Edge 'a', 0)
34 | , (0, singleEdge $ Edge 'a', 1)
35 | , (0, singleEdge $ Edge 'b', 0)
36 | , (1, singleEdge $ Edge 'b', 2)
37 | , (2, singleEdge $ Edge 'b', 3)
38 | ]
39 | }
40 |
41 | testValid0 =
42 | ( validStartState nfa0
43 | && validFinalStates nfa0
44 | && validTransitions nfa0
45 | )
46 | @?=
47 | True
48 |
49 | testClosureWith0 =
50 | closureWith (Edge 'a' ==) nfa0 (singleton 0)
51 | @?=
52 | fromList [0, 1]
53 |
54 | testClosureWith1 =
55 | closureWith (NFAEpsilon ==) nfa0 (singleton 0)
56 | @?=
57 | fromList [0]
58 |
59 | testClosureWith2 =
60 | closureWith (const True) nfa0 (singleton 0)
61 | @?=
62 | fromList [0, 1, 2, 3]
63 |
64 | testClosureWith3 =
65 | closureWith (Edge 'b' ==) nfa0 (singleton 0)
66 | @?=
67 | fromList [0]
68 |
69 | testMove0 =
70 | move nfa0 (fromList [0,1,2]) (Edge 'a')
71 | @?=
72 | fromList [0,1]
73 |
74 | nfa334 :: NFA Char Int
75 | nfa334 = Automata
76 | { _S = fL [0 .. 10]
77 | , _Σ = fL "ab"
78 | , s0 = 0
79 | , _F = fL [10]
80 | , _Δ = fL
81 | [ (0, singleEdge NFAEpsilon, 1)
82 | , (0, singleEdge NFAEpsilon, 7)
83 | , (1, singleEdge NFAEpsilon, 2)
84 | , (1, singleEdge NFAEpsilon, 4)
85 | , (2, singleEdge $ Edge 'a', 3)
86 | , (3, singleEdge NFAEpsilon, 6)
87 | , (4, singleEdge $ Edge 'b', 5)
88 | , (5, singleEdge NFAEpsilon, 6)
89 | , (6, singleEdge NFAEpsilon, 1)
90 | , (6, singleEdge NFAEpsilon, 7)
91 | , (7, singleEdge $ Edge 'a', 8)
92 | , (8, singleEdge $ Edge 'b', 9)
93 | , (9, singleEdge $ Edge 'b', 10)
94 | ]
95 | }
96 |
97 | _A = fromList [0,1,2,4,7]
98 | _B = fromList [1,2,3,4,6,7,8]
99 | _C = fromList [1,2,4,5,6,7]
100 | _D = fromList [1,2,4,5,6,7,9]
101 | _E = fromList [1,2,4,5,6,7,10]
102 |
103 | a = singleEdge 'a'
104 | b = singleEdge 'b'
105 |
106 | dfa336 :: DFA.DFA Char (Set Int)
107 | dfa336 = Automata
108 | { _S = fL [_A, _B, _C, _D, _E]
109 | , _Σ = fL "ab"
110 | , s0 = _A
111 | , _F = fL [_E]
112 | , _Δ = fL
113 | [ (_A, a, _B), (_A, b, _C)
114 | , (_B, a, _B), (_B, b, _D)
115 | , (_C, a, _B), (_C, b, _C)
116 | , (_D, a, _B), (_D, b, _E)
117 | , (_E, a, _B), (_E, b, _C)
118 | ]
119 | }
120 |
121 | nfa2dfa0 =
122 | nfa2dfa nfa334
123 | @?=
124 | dfa336
125 |
126 | nfa334Eps0 =
127 | NFA.epsClosure nfa334
128 |
129 | epsilonNFA =
130 | Automata
131 | { _S = fL [0, 1]
132 | , _Σ = fL ""
133 | , s0 = 0
134 | , _F = fL [1]
135 | , _Δ = fL [ (0, singleEdge NFAEpsilon, 1) ]
136 | }
137 |
138 | regexTest0 =
139 | regex2nfa Epsilon
140 | @?=
141 | epsilonNFA
142 |
143 | regexTest1 =
144 | regex2nfa (Symbol 'a')
145 | @?=
146 | epsilonNFA { _Σ = fL "a", _Δ = fL [ (0, singleEdge $ Edge 'a', 1) ] }
147 |
148 | regexTestUnion =
149 | regex2nfa (Union (Symbol 'a') (Symbol 'b'))
150 | @?= Automata
151 | { _S = fL [0..5]
152 | , _Σ = fL "ab"
153 | , s0 = 4
154 | , _F = fL [5]
155 | , _Δ = fL [ (0, singleEdge $ Edge 'a', 1)
156 | , (2, singleEdge $ Edge 'b', 3)
157 | , (4, singleEdge NFAEpsilon, 0)
158 | , (4, singleEdge NFAEpsilon, 2)
159 | , (1, singleEdge NFAEpsilon, 5)
160 | , (3, singleEdge NFAEpsilon, 5) ]
161 | }
162 |
163 | regexTestConcat =
164 | regex2nfa (Concat [Symbol 'a', Symbol 'b'])
165 | @?= Automata
166 | { _S = fL [0..3]
167 | , _Σ = fL "ab"
168 | , s0 = 0
169 | , _F = fL [3]
170 | , _Δ = fL [ (0, singleEdge $ Edge 'a', 1)
171 | , (1, singleEdge NFAEpsilon, 2)
172 | , (2, singleEdge $ Edge 'b', 3) ]
173 | }
174 |
175 | regexTestKleene =
176 | regex2nfa (Kleene (Concat [Symbol 'a', Symbol 'b']))
177 | @?= Automata
178 | { _S = fL [0..5]
179 | , _Σ = fL "ab"
180 | , s0 = 4
181 | , _F = fL [5]
182 | , _Δ = fL [ (0, singleEdge $ Edge 'a', 1)
183 | , (1, singleEdge NFAEpsilon, 2)
184 | , (2, singleEdge $ Edge 'b', 3)
185 | , (4, singleEdge NFAEpsilon, 0)
186 | , (4, singleEdge NFAEpsilon, 5)
187 | , (3, singleEdge NFAEpsilon, 0)
188 | , (3, singleEdge NFAEpsilon, 5)]
189 | }
190 |
191 | regexTestPosclos =
192 | regex2nfa (PosClos (Concat [Symbol 'a', Symbol 'b']))
193 | @?= Automata
194 | { _S = fL [0..9]
195 | , _Σ = fL "ab"
196 | , s0 = 0
197 | , _F = fL [9]
198 | , _Δ = fL [ (0, singleEdge $ Edge 'a', 1)
199 | , (1, singleEdge NFAEpsilon, 2)
200 | , (2, singleEdge $ Edge 'b', 3)
201 | , (3, singleEdge NFAEpsilon, 8)
202 | , (4, singleEdge $ Edge 'a', 5)
203 | , (5, singleEdge NFAEpsilon, 6)
204 | , (6, singleEdge $ Edge 'b', 7)
205 | , (8, singleEdge NFAEpsilon, 4)
206 | , (8, singleEdge NFAEpsilon, 9)
207 | , (7, singleEdge NFAEpsilon, 4)
208 | , (7, singleEdge NFAEpsilon, 9)]
209 | }
210 |
211 | dfaABPlus = regex2dfa (PosClos (Concat [Symbol 'a', Symbol 'b']))
212 | dfaWS = regex2dfa (PosClos (Symbol ' '))
213 |
214 | {-
215 | dfaGetName x
216 | | x == dfaWS = "ws"
217 | | x == dfaABPlus = "ab+"
218 | | otherwise = "Error"
219 | -}
220 |
221 | tokenizeTest0 =
222 | tokenize [("ab+", dfaABPlus), ("ws", dfaWS)] const "abab ab ababab"
223 | @?=
224 | [ Token "ab+" "abab" 4
225 | , Token "ws" " " 1
226 | , Token "ab+" "ab" 2
227 | , Token "ws" " " 1
228 | , Token "ab+" "ababab" 6
229 | , EOF
230 | ]
231 |
232 | {-
233 | dfaID = regex2dfa
234 | (PosClos $ MultiUnion
235 | [ Class ['a' .. 'z']
236 | , Class ['A' .. 'Z']
237 | , Symbol '_'
238 | ])
239 | -}
240 |
241 | dfaID = regex2dfa
242 | (PosClos $ Class $ '_' : ['a' .. 'z'] ++ ['A' .. 'Z'])
243 |
244 | -- For profiling runtime of DFA subset construction (nfa2dfa):
245 | dfaIDTest =
246 | dfaID
247 | @?=
248 | dfaID
249 |
250 | dfaEQ = regex2dfa (Symbol '=')
251 | dfaSEMI = regex2dfa (Symbol ';')
252 |
253 | dfaINT = regex2dfa (PosClos $ Class [ '0' .. '9' ])
254 |
255 | data TermSymbol = T_ID | T_INT | T_WS | T_EQ | T_SEMI
256 | deriving (Eq, Ord, Show)
257 |
258 | data TermValue =
259 | ID String
260 | | INT Int
261 | | WS String
262 | | EQSIGN
263 | | SEMI
264 | deriving (Eq, Ord, Show)
265 |
266 | lexeme2value lexeme T_WS = WS lexeme
267 | lexeme2value lexeme T_ID = ID lexeme
268 | lexeme2value lexeme T_INT = INT $ read lexeme
269 | lexeme2value lexeme T_EQ = EQSIGN
270 | lexeme2value lexeme T_SEMI = SEMI
271 |
272 | tokenizeTest1 =
273 | tokenize
274 | [ (T_WS, dfaWS), (T_ID, dfaID), (T_INT, dfaINT)
275 | , (T_EQ, dfaEQ), (T_SEMI, dfaSEMI) ]
276 | lexeme2value "_matt = 0;"
277 | @?=
278 | [ Token T_ID (ID "_matt") 5
279 | , Token T_WS (WS " ") 1
280 | , Token T_EQ EQSIGN 1
281 | , Token T_WS (WS " ") 1
282 | , Token T_INT (INT 0) 1
283 | , Token T_SEMI SEMI 1
284 | , EOF
285 | ]
286 |
287 | lineCommentDFA = regex2dfa $ Concat [Literal "//", Kleene $ NotClass ['\n'], Symbol '\n']
288 |
289 | lineCommentTest =
290 | tokenize [("LineComment", lineCommentDFA)] const
291 | "// This is a line comment.\n"
292 | @?=
293 | [ Token "LineComment" "// This is a line comment.\n" 27
294 | , EOF
295 | ]
296 |
297 | main :: IO ()
298 | main = defaultMainWithOpts
299 | [ testCase "testValid0" testValid0
300 | , testCase "testClosureWith0" testClosureWith0
301 | , testCase "testClosureWith1" testClosureWith1
302 | , testCase "testClosureWith2" testClosureWith2
303 | , testCase "testClosureWith3" testClosureWith3
304 | , testCase "testMove0" testMove0
305 | , testCase "nfa2dfa0" nfa2dfa0
306 | , testCase "regexTest0" regexTest0
307 | , testCase "regexTest1" regexTest1
308 | , testCase "regexTestUnion" regexTestUnion
309 | , testCase "regexTestConcat" regexTestConcat
310 | , testCase "regexTestKleene" regexTestKleene
311 | , testCase "regexTestPosclos" regexTestPosclos
312 | , testCase "tokenizeTest0" tokenizeTest0
313 | , testCase "dfaIDTest" dfaIDTest
314 | , testCase "tokenizeTest1" tokenizeTest1
315 | , testCase "lineCommentTest" lineCommentTest
316 | ] mempty
317 |
318 |
--------------------------------------------------------------------------------
/test/ll/Main.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE FlexibleContexts #-}
2 | module Main where
3 | import Example.Grammar
4 | import Text.ANTLR.Grammar
5 | import Text.ANTLR.Parser
6 | import Text.ANTLR.Pretty
7 | import qualified Data.Text as T
8 | import Text.ANTLR.LL1
9 |
10 | import Text.ANTLR.Set (fromList, union, empty, Set(..))
11 | import qualified Text.ANTLR.Set as Set
12 |
13 | import qualified Data.Map.Strict as M
14 |
15 | import System.IO.Unsafe (unsafePerformIO)
16 | import Data.Monoid
17 | import Test.Framework
18 | import Test.Framework (defaultMainWithOpts)
19 | import Test.Framework.Providers.HUnit
20 | import Test.Framework.Providers.QuickCheck2
21 | import Test.HUnit hiding ((@?=), assertEqual)
22 | import Test.QuickCheck (Property, quickCheck, (==>))
23 | import qualified Test.QuickCheck.Monadic as TQM
24 |
25 | import Text.ANTLR.HUnit
26 |
27 | type LL1NonTerminal = String
28 | type LL1Terminal = String
29 |
30 | uPIO = unsafePerformIO
31 |
32 | grm :: Grammar () LL1NonTerminal LL1Terminal ()
33 | grm = dragonBook428
34 |
35 | termination = first grm [NT "E"] @?= first grm [NT "E"]
36 |
37 | firstF = first grm [NT "F"] @?= fromList [Icon "(", Icon "id"]
38 |
39 | noEps = first grm [NT "E"] @?= fromList [Icon "(", Icon "id"]
40 |
41 | firstT' =
42 | first grm [NT "T'"]
43 | @?=
44 | fromList [Icon "*", IconEps]
45 |
46 | foldEpsTest = foldWhileEpsilon union empty
47 | [ fromList [Icon "(", Icon "id"]
48 | , fromList [Icon ")"]
49 | ]
50 | @?=
51 | fromList [Icon "(", Icon "id"]
52 |
53 | firstAll =
54 | ( Set.map ((\nt -> (nt, first grm [nt])) . NT) (ns grm)
55 | `union`
56 | Set.map ((\t -> (t, first grm [t])) . T) (ts grm)
57 | )
58 | @?=
59 | fromList
60 | [ (NT "E", fromList [Icon "(", Icon "id"])
61 | , (NT "E'", fromList [Icon "+", IconEps])
62 | , (NT "F", fromList [Icon "(", Icon "id"])
63 | , (NT "T", fromList [Icon "(", Icon "id"])
64 | , (NT "T'", fromList [Icon "*", IconEps])
65 | , (T "(", fromList [Icon "("])
66 | , (T ")", fromList [Icon ")"])
67 | , (T "*", fromList [Icon "*"])
68 | , (T "+", fromList [Icon "+"])
69 | , (T "id", fromList [Icon "id"])
70 | ]
71 |
72 | grm' :: Grammar () LL1NonTerminal LL1Terminal ()
73 | grm' = grm
74 |
75 | followAll :: IO ()
76 | followAll = let
77 | fncn :: LL1NonTerminal -> (ProdElem LL1NonTerminal LL1Terminal, Set (Icon LL1Terminal))
78 | fncn nt = (NT nt, follow grm' nt)
79 | in Set.map fncn (ns grm')
80 | @?=
81 | fromList
82 | [ (NT "E", fromList [Icon ")", IconEOF])
83 | , (NT "E'", fromList [Icon ")", IconEOF])
84 | , (NT "T", fromList [Icon ")", Icon "+", IconEOF])
85 | , (NT "T'", fromList [Icon ")", Icon "+", IconEOF])
86 | , (NT "F", fromList [Icon ")", Icon "*", Icon "+", IconEOF])
87 | ]
88 |
89 | parseTableTest =
90 | parseTable grm
91 | @?=
92 | M.fromList (map (\((a,b),c) -> ((a,b), Set.singleton c))
93 | -- Figure 4.17 of dragon book:
94 | [ (("E", Icon "id"), [NT "T", NT "E'"])
95 | , (("E", Icon "("), [NT "T", NT "E'"])
96 | , (("E'", Icon "+"), [T "+", NT "T", NT "E'"])
97 | , (("E'", Icon ")"), [Eps])
98 | , (("E'", IconEOF), [Eps])
99 | , (("T", Icon "id"), [NT "F", NT "T'"])
100 | , (("T", Icon "("), [NT "F", NT "T'"])
101 | , (("T'", Icon "+"), [Eps])
102 | , (("T'", Icon "*"), [T "*", NT "F", NT "T'"])
103 | , (("T'", Icon ")"), [Eps])
104 | , (("T'", IconEOF), [Eps])
105 | , (("F", Icon "id"), [T "id"])
106 | , (("F", Icon "("), [T "(", NT "E", T ")"])
107 | ])
108 |
109 | type LLAST = AST LL1NonTerminal LL1Terminal
110 |
111 | action0 EpsE = LeafEps
112 | action0 (TermE t) = Leaf t
113 | action0 (NonTE (nt, ss, us)) = AST nt ss us
114 |
115 | action1 ::
116 | (Prettify t, Prettify (StripEOF (Sym t)), Prettify nts)
117 | => ParseEvent (AST nts t) nts t -> AST nts t
118 | action1 (NonTE (nt, ss, trees)) = uPIO (putStrLn $ T.unpack $ pshow ("Act:", nt, ss, trees)) `seq` action0 $ NonTE (nt,ss,trees)
119 | action1 (TermE x) = uPIO (putStrLn $ T.unpack $ pshow ("Act:", x)) `seq` action0 $ TermE x
120 | action1 EpsE = action0 EpsE
121 |
122 | dragonPredParse =
123 | predictiveParse grm action0 ["id", "+", "id", "*", "id", ""]
124 | @?=
125 | (Just $ AST "E" [NT "T", NT "E'"]
126 | [ AST "T" [NT "F", NT "T'"]
127 | [ AST "F" [T "id"] [Leaf "id"]
128 | , AST "T'" [Eps] [LeafEps]
129 | ]
130 | , AST "E'" [T "+", NT "T", NT "E'"]
131 | [ Leaf "+"
132 | , AST "T" [NT "F", NT "T'"]
133 | [ AST "F" [T "id"] [Leaf "id"]
134 | , AST "T'" [T "*", NT "F", NT "T'"]
135 | [ Leaf "*"
136 | , AST "F" [T "id"] [Leaf "id"]
137 | , AST "T'" [Eps] [LeafEps]
138 | ]
139 | ]
140 | , AST "E'" [Eps] [LeafEps]
141 | ]
142 | ])
143 |
144 | singleLang :: Grammar () String Char ()
145 | singleLang = (defaultGrammar "S" :: Grammar () String Char ())
146 | { s0 = "S"
147 | , ns = fromList ["S", "X"]
148 | , ts = fromList ['a']
149 | , ps = [ production "S" $ Prod Pass [NT "X", T 'a']
150 | , production "X" $ Prod Pass [Eps]
151 | ]
152 | }
153 |
154 | testRemoveEpsilons =
155 | removeEpsilons singleLang
156 | @?= singleLang
157 | { ps = [ production "S" $ Prod Pass [NT "X", T 'a']
158 | , production "S" $ Prod Pass [T 'a']
159 | ]
160 | }
161 |
162 | singleLang2 :: Grammar () String Char ()
163 | singleLang2 = singleLang
164 | { ts = fromList ['a', 'b']
165 | , ps = [ production "S" $ Prod Pass [NT "X", T 'a', NT "X", T 'b', NT "X"]
166 | , production "X" $ Prod Pass [Eps]
167 | ]
168 | }
169 |
170 | testRemoveEpsilons2 =
171 | (Set.fromList . ps . removeEpsilons) singleLang2
172 | @?=
173 | fromList
174 | [ production "S" $ Prod Pass [ T 'a', T 'b' ]
175 | , production "S" $ Prod Pass [ T 'a', T 'b', NT "X"]
176 | , production "S" $ Prod Pass [ T 'a', NT "X", T 'b' ]
177 | , production "S" $ Prod Pass [ T 'a', NT "X", T 'b', NT "X"]
178 | , production "S" $ Prod Pass [NT "X", T 'a', T 'b' ]
179 | , production "S" $ Prod Pass [NT "X", T 'a', T 'b', NT "X"]
180 | , production "S" $ Prod Pass [NT "X", T 'a', NT "X", T 'b' ]
181 | , production "S" $ Prod Pass [NT "X", T 'a', NT "X", T 'b', NT "X"]
182 | ]
183 |
184 | testRemoveEpsilons3 =
185 | removeEpsilons dragonBook428
186 | @?= (defaultGrammar "E" :: Grammar () String String ())
187 | { ns = fromList ["E", "E'", "T", "T'", "F"]
188 | , ts = fromList ["+", "*", "(", ")", "id"]
189 | , s0 = "E"
190 | , ps = [ production "E" $ Prod Pass [NT "T", NT "E'"]
191 | , production "E'" $ Prod Pass [T "+", NT "T", NT "E'"]
192 | , production "E'" $ Prod Pass [Eps] -- Implicitly epsilon
193 | , production "T" $ Prod Pass [NT "F", NT "T'"]
194 | , production "T'" $ Prod Pass [T "*", NT "F", NT "T'"]
195 | , production "T'" $ Prod Pass [Eps]
196 | , production "F" $ Prod Pass [T "(", NT "E", T ")"]
197 | , production "F" $ Prod Pass [T "id"]
198 | ]
199 | }
200 |
201 | leftGrammar0 :: Grammar () Char Char ()
202 | leftGrammar0 = (defaultGrammar 'S' :: Grammar () Char Char ())
203 | { ns = fromList "SABC"
204 | , ts = fromList "defg"
205 | , s0 = 'S'
206 | , ps = [ production 'S' $ Prod Pass [NT 'A']
207 | , production 'A' $ Prod Pass [T 'd', T 'e', NT 'B']
208 | , production 'A' $ Prod Pass [T 'd', T 'e', NT 'C']
209 | , production 'B' $ Prod Pass [T 'f']
210 | , production 'C' $ Prod Pass [T 'g']
211 | ]
212 | }
213 |
214 | testLeftFactor =
215 | leftFactor leftGrammar0
216 | @?= G
217 | { ns = fromList $ map Prime [('S', 0), ('A', 0), ('B', 0), ('C', 0)]
218 | , ts = fromList "defg"
219 | , s0 = Prime ('S', 0)
220 | , ps = [ production (Prime ('S', 0)) $ Prod Pass [NT $ Prime ('A', 0)]
221 | , production (Prime ('A', 0)) $ Prod Pass [T 'd', T 'e', NT $ Prime ('A', 1)]
222 | , production (Prime ('A', 1)) $ Prod Pass [NT $ Prime ('B', 0)]
223 | , production (Prime ('A', 1)) $ Prod Pass [NT $ Prime ('C', 0)]
224 | , production (Prime ('B', 0)) $ Prod Pass [T 'f']
225 | , production (Prime ('C', 0)) $ Prod Pass [T 'g']
226 | ]
227 | , _πs = fromList []
228 | , _μs = fromList []
229 | }
230 |
231 | main :: IO ()
232 | main = defaultMainWithOpts
233 | [ testCase "fold_epsilon" foldEpsTest
234 | , testCase "termination" termination
235 | , testCase "no_epsilon" noEps
236 | , testCase "firstF" firstF
237 | , testCase "firstT'" firstT'
238 | , testCase "firstAll" firstAll
239 | , testCase "followAll" followAll
240 | , testCase "dragonHasAllNonTerms" $ hasAllNonTerms grm @?= True
241 | , testCase "dragonHasAllTerms" $ hasAllTerms grm @?= True
242 | , testCase "dragonStartIsNonTerm" $ startIsNonTerm grm @?= True
243 | , testCase "dragonIsValid" $ validGrammar grm @?= True
244 | , testCase "dragonIsLL1" $ isLL1 grm @?= True
245 | , testCase "dragonParseTable" parseTableTest
246 | , testCase "dragonPredParse" dragonPredParse
247 | , testCase "testRemoveEpsilons" testRemoveEpsilons
248 | , testCase "testRemoveEpsilons2" testRemoveEpsilons2
249 | , testCase "testLeftFactor" testLeftFactor
250 | ] mempty
251 |
252 |
--------------------------------------------------------------------------------
/test/lr/EOF.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module EOF where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 | import Language.ANTLR4.Syntax (stripQuotesReadEscape)
19 |
20 | import EOFGrammar
21 |
22 | $(g4_parsers eOFAST eOFGrammar)
23 |
24 | test_eof = case glrParse (== T_WS) "anything that is not a comma" of
25 | (ResultAccept ast) -> ast2words ast @?= ["anything", "that", "is", "not", "a", "comma"]
26 | (ResultSet xs) -> assertFailure $ "Ambiguous parse: " ++ pshow' xs
27 | rest -> assertFailure $ stripQuotesReadEscape $ "\"" ++ pshow' rest ++ "\""
28 |
29 |
--------------------------------------------------------------------------------
/test/lr/EOFGrammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module EOFGrammar where
6 | import Language.ANTLR4
7 |
8 | data Plus = Plus String String | Minus String String
9 | deriving (Eq, Show)
10 |
11 | [g4|
12 | grammar EOF;
13 |
14 | words : word* ;
15 |
16 | word : WORD ;
17 |
18 | WORD : (~ [, ])* -> String;
19 |
20 | WS : [ \t\n\r\f\v]+ -> String;
21 | |]
22 |
23 |
--------------------------------------------------------------------------------
/test/lr/GLRInc.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module GLRInc where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 | import Language.ANTLR4.Syntax (stripQuotesReadEscape)
19 |
20 | import GLRIncGrammar
21 |
22 | $(g4_parsers gLRIncAST gLRIncGrammar)
23 |
24 | test_GLRInc = case glrParse (== T_WS) "word - foo" of
25 | (ResultAccept ast) -> ast2plus ast @?= Minus "word" "foo"
26 | (ResultSet xs) -> assertFailure $ "Ambiguous parse: " ++ pshow' xs
27 | rest -> assertFailure $ stripQuotesReadEscape $ "\"" ++ pshow' rest ++ "\""
28 |
29 |
--------------------------------------------------------------------------------
/test/lr/GLRIncGrammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module GLRIncGrammar where
6 | import Language.ANTLR4
7 |
8 | data Plus = Plus String String | Minus String String
9 | deriving (Eq, Show)
10 |
11 | [g4|
12 | grammar GLRInc;
13 |
14 | plus : LowerID '+' Prim -> Plus
15 | | Prim '-' LowerID -> Minus
16 | ;
17 |
18 | LowerID : [a-z][a-zA-Z0-9_]* -> String;
19 | Prim : 'word' -> String;
20 |
21 | WS : [ \t\n\r\f\v]+ -> String;
22 | |]
23 |
24 |
--------------------------------------------------------------------------------
/test/lr/GLRPartial.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module GLRPartial where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 | import Language.ANTLR4.Syntax (stripQuotesReadEscape)
20 |
21 | import GLRPartialGrammar
22 |
23 | $(g4_parsers gLRPartialAST gLRPartialGrammar)
24 |
25 | test_GLRPartial = case glrParse (== T_WS) "word\nword\nword\nw0rd" of
26 | (ResultAccept ast) -> assertFailure $ "Was not suppose to parse: " ++ pshow' (ast2words ast)
27 | (ResultSet xs) -> assertFailure $ "Ambiguous parse: " ++ pshow' xs
28 | (ErrorNoAction cfg asts _) -> return () -- Correct, should not have parsed
29 | rest -> assertFailure $ stripQuotesReadEscape $ "\"" ++ pshow' rest ++ "\""
30 |
31 |
--------------------------------------------------------------------------------
/test/lr/GLRPartialGrammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module GLRPartialGrammar where
6 | import Language.ANTLR4
7 |
8 | [g4|
9 | grammar GLRPartial;
10 |
11 | words : word+ ;
12 |
13 | word : Prim ;
14 |
15 | LowerID : [a-z][a-zA-Z0-9_]* -> String;
16 | Prim : 'word' -> String;
17 |
18 | WS : [ \t\n\r\f\v]+ -> String;
19 | |]
20 |
21 |
--------------------------------------------------------------------------------
/test/sexpression/Grammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell #-}
4 | module Grammar where
5 | import Language.ANTLR4
6 |
7 | {-
8 | The MIT License
9 | Copyright (c) 2008 Robert Stehwien
10 | Permission is hereby granted, free of charge, to any person obtaining a copy
11 | of this software and associated documentation files (the "Software"), to deal
12 | in the Software without restriction, including without limitation the rights
13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 | copies of the Software, and to permit persons to whom the Software is
15 | furnished to do so, subject to the following conditions:
16 | The above copyright notice and this permission notice shall be included in
17 | all copies or substantial portions of the Software.
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 |
26 | Port to Antlr4 by Tom Everett
27 | Subsequent port to antlr-haskell by Karl Cronburg
28 | -}
29 |
30 | data Atom
31 | = Str String
32 | | Symb String
33 | | Number Double
34 | deriving (Eq, Ord, Show)
35 |
36 | data Item
37 | = Atm Atom
38 | | List [Item]
39 | | Field Item Item
40 | deriving (Eq, Ord, Show)
41 |
42 | [g4|
43 | grammar Sexpression;
44 |
45 | sexpr
46 | : item*
47 | ;
48 |
49 | item
50 | : atom -> ${\a -> Atm a}
51 | | list -> List
52 | | '(' item '.' item ')' -> Field
53 | ;
54 |
55 | list
56 | : '(' item* ')'
57 | ;
58 |
59 | atom
60 | : STRING -> Str
61 | | SYMBOL -> Symb
62 | | NUMBER -> Number
63 | ;
64 |
65 | STRING : ["] ( ('\\' .) | ~ ["\\] )* ["] -> String;
66 |
67 | WHITESPACE : [ \n\t\r]+ -> String;
68 |
69 | NUMBER : [-+]? DIGIT+ ([\.] DIGIT+)? -> Double;
70 |
71 | SYMBOL : SYMBOL_START (SYMBOL_START | DIGIT)* -> String;
72 |
73 | fragment SYMBOL_START : [a-zA-Z+\-*/] ;
74 |
75 | fragment DIGIT : [0-9] ;
76 |
77 | |]
78 |
79 | isWS T_WHITESPACE = True
80 | isWS _ = False
81 |
82 |
--------------------------------------------------------------------------------
/test/sexpression/Parser.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell #-}
4 | module Parser
5 | ( module Grammar
6 | , glrParse, ast2sexpr
7 | -- , glrParseFast
8 | ) where
9 | import Language.ANTLR4
10 | import Grammar
11 |
12 | --import qualified GHC.Types as G
13 | import qualified Text.ANTLR.LR as LR
14 |
15 | $(g4_parsers sexpressionAST sexpressionGrammar)
16 |
17 | -- $(mkLRParser the_ast sexpressionGrammar)
18 |
19 |
--------------------------------------------------------------------------------
/test/sexpression/sexpression.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 | import Language.ANTLR4
3 | import Parser
4 | import qualified Text.ANTLR.Set as S
5 |
6 | getAST (ResultAccept ast) = ast
7 | getAST _ = error "non-AST in ResultSet"
8 |
9 | main =
10 | case glrParse isWS "((m1lk ju1ce 3.1) . (h0ney marmalade \"jam\"))" of
11 | (ResultAccept ast) -> print $ ast2sexpr ast
12 | (ResultSet xs) -> mapM_ (print . ast2sexpr . getAST) (S.toList xs)
13 |
14 |
--------------------------------------------------------------------------------
/test/shared-hunit/Text/ANTLR/HUnit.hs:
--------------------------------------------------------------------------------
1 | module Text.ANTLR.HUnit where
2 | import Control.DeepSeq
3 | import Control.Exception as E
4 | import Control.Monad
5 | import Data.List
6 | import Data.Typeable
7 | import Data.CallStack
8 | import Test.HUnit.Lang hiding (assertEqual, (@?=), (@=?))
9 |
10 | import Text.ANTLR.Pretty
11 | import qualified Data.Text as T
12 |
13 | location :: HasCallStack => Maybe SrcLoc
14 | location = case reverse callStack of
15 | (_, loc) : _ -> Just loc
16 | [] -> Nothing
17 |
18 | -- | Asserts that the specified actual value is equal to the expected value.
19 | -- The output message will contain the prefix, the expected value, and the
20 | -- actual value.
21 | --
22 | -- If the prefix is the empty string (i.e., @\"\"@), then the prefix is omitted
23 | -- and only the expected and actual values are output.
24 | assertEqual :: (HasCallStack, Eq a, Prettify a)
25 | => String -- ^ The message prefix
26 | -> a -- ^ The expected value
27 | -> a -- ^ The actual value
28 | -> Assertion
29 | assertEqual preface expected actual =
30 | unless (actual == expected) $ do
31 | (prefaceMsg `deepseq` expectedMsg `deepseq` actualMsg `deepseq` E.throwIO (HUnitFailure location $ ExpectedButGot prefaceMsg expectedMsg actualMsg))
32 | where
33 | prefaceMsg
34 | | null preface = Nothing
35 | | otherwise = Just preface
36 | expectedMsg = '\n' : T.unpack (pshowIndent 4 expected)
37 | actualMsg = '\n' : T.unpack (pshowIndent 4 actual)
38 |
39 | -- | Asserts that the specified actual value is equal to the expected value
40 | -- (with the actual value on the left-hand side).
41 | (@?=) :: (HasCallStack, Eq a, Prettify a)
42 | => a -- ^ The actual value
43 | -> a -- ^ The expected value
44 | -> Assertion
45 | actual @?= expected = assertEqual "" expected actual
46 |
47 | (@=?) a b = (@?=) b a
48 |
49 |
--------------------------------------------------------------------------------
/test/shared/Example/Grammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE ExplicitForAll, DeriveAnyClass, DeriveGeneric, TypeFamilies
2 | , DeriveDataTypeable #-}
3 | module Example.Grammar where
4 | import Text.ANTLR.Set (fromList, member, (\\), empty, Generic(..), Hashable(..))
5 | import Text.ANTLR.Grammar
6 | import Text.ANTLR.Pretty
7 | import Data.Data (toConstr, Data(..))
8 |
9 | data NS0 = A | B | C deriving (Eq, Ord, Generic, Hashable, Bounded, Enum, Show, Data)
10 | data TS0 = A_ | B_ | C_ deriving (Eq, Ord, Generic, Hashable, Bounded, Enum, Show, Data)
11 | a = A_
12 | b = B_
13 | c = C_
14 |
15 | -- TODO: boilerplate identity type classes for bounded enums
16 | instance Ref NS0 where
17 | type Sym NS0 = NS0
18 | getSymbol = id
19 | instance Ref TS0 where
20 | type Sym TS0 = TS0
21 | getSymbol = id
22 | instance Prettify NS0 where prettify = rshow . toConstr
23 | instance Prettify TS0 where prettify = rshow . toConstr
24 | dG :: Grammar () NS0 TS0 ()
25 | dG = defaultGrammar C
26 |
27 | production :: nts -> (ProdRHS s nts ts) -> Production s nts ts dt
28 | production lhs rhs = Production lhs rhs Nothing
29 |
30 | mattToolG :: Grammar () NS0 TS0 ()
31 | mattToolG = dG
32 | { ns = fromList [A, B, C]
33 | , ts = fromList [a, b, c]
34 | , s0 = C
35 | , ps =
36 | [ production A $ Prod Pass [T a, T b]
37 | , production A $ Prod Pass [T a]
38 | , production B $ Prod Pass [NT A, T b]
39 | , production B $ Prod Pass [T b]
40 | , production C $ Prod Pass [NT A, NT B, NT C]
41 | ]
42 | }
43 |
44 | dG' :: Grammar () String String ()
45 | dG' = defaultGrammar "A"
46 |
47 | dragonBook428 :: Grammar () String String ()
48 | dragonBook428 = dG'
49 | { ns = fromList ["E", "E'", "T", "T'", "F"]
50 | , ts = fromList ["+", "*", "(", ")", "id"]
51 | , s0 = "E"
52 | , ps = [ production "E" $ Prod Pass [NT "T", NT "E'"]
53 | , production "E'" $ Prod Pass [T "+", NT "T", NT "E'"]
54 | , production "E'" $ Prod Pass [Eps] -- Implicitly epsilon
55 | , production "T" $ Prod Pass [NT "F", NT "T'"]
56 | , production "T'" $ Prod Pass [T "*", NT "F", NT "T'"]
57 | , production "T'" $ Prod Pass [Eps]
58 | , production "F" $ Prod Pass [T "(", NT "E", T ")"]
59 | , production "F" $ Prod Pass [T "id"]
60 | ]
61 | }
62 |
63 | dragonBook41 :: Grammar () String String ()
64 | dragonBook41 = dG'
65 | { ns = fromList ["E'", "E", "T", "F"]
66 | , ts = fromList ["+", "*", "(", ")", "id"]
67 | , s0 = "E"
68 | , ps = [ production "E" $ Prod Pass [NT "E", T "+", NT "T"]
69 | , production "E" $ Prod Pass [NT "T"]
70 | , production "T" $ Prod Pass [NT "T", T "*", NT "F"]
71 | , production "T" $ Prod Pass [NT "F"]
72 | , production "F" $ Prod Pass [T "(", NT "E", T ")"]
73 | , production "F" $ Prod Pass [T "id"]
74 | ]
75 | }
76 |
77 | dragonBook455 :: Grammar () String String ()
78 | dragonBook455 = dG'
79 | { ns = fromList ["S", "C"]
80 | , ts = fromList ["c", "d"]
81 | , s0 = "S"
82 | , ps = [ production "S" $ Prod Pass [NT "C", NT "C"]
83 | , production "C" $ Prod Pass [T "c", NT "C"]
84 | , production "C" $ Prod Pass [T "d"]
85 | ]
86 | }
87 |
88 | dumbGrammar :: Grammar () String String ()
89 | dumbGrammar = dG'
90 | { ns = fromList ["S", "A", "B", "I", "D"]
91 | , ts = fromList ["1","2","3","+","-","*"]
92 | , s0 = "S"
93 | , ps = [ production "S" $ Prod Pass [NT "A"]
94 | , production "S" $ Prod Pass [NT "B"]
95 | , production "S" $ Prod Pass [NT "D"]
96 | , production "A" $ Prod Pass [NT "I", T "+", NT "I"]
97 | , production "B" $ Prod Pass [NT "I", T "-", NT "I"]
98 | , production "I" $ Prod Pass [T "1"]
99 | , production "I" $ Prod Pass [T "2"]
100 | , production "I" $ Prod Pass [T "3"]
101 | , production "D" $ Prod Pass [NT "I", T "*", NT "I"]
102 | ]
103 | --, us = [(\_ -> True)]
104 | }
105 |
106 |
--------------------------------------------------------------------------------
/test/simple/Grammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module Grammar where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 |
19 | data Attr = A | B
20 |
21 | data Decl = Foo | Bar
22 |
23 | [g4|
24 | grammar Simple;
25 |
26 | attrDecl : attr* decl ;
27 |
28 | attrDecl2 : attr? decl ;
29 |
30 | attrDecl3 : attr+ decl ;
31 |
32 | attr : 'a' ';' -> A
33 | | 'b' ';' -> B
34 | ;
35 |
36 | decl : 'foo' -> Foo
37 | | 'bar' -> Bar
38 | ;
39 |
40 | UNICODE : '\u0008' -> String ;
41 |
42 | |]
43 |
44 |
--------------------------------------------------------------------------------
/test/simple/Main.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module Main where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 |
20 | import Grammar
21 |
22 | --foo = [ $(lift $ LR.lr1Table simpleGrammar) ]
23 |
24 | --test_star = foo @?= []
25 | test_star = () @?= ()
26 |
27 | main :: IO ()
28 | main = defaultMainWithOpts
29 | [ testCase "test_star" test_star
30 | ] mempty
31 |
32 |
--------------------------------------------------------------------------------
/test/swift/Parser.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell #-}
4 | module Parser
5 | ( module Grammar
6 | , glrParse, ast2sexpr
7 | ) where
8 | import Language.ANTLR4
9 | import Grammar
10 |
11 | import qualified Text.ANTLR.LR as LR
12 |
13 | $(g4_parsers swiftAST swiftGrammar)
14 | -- $(mkLRParser the_ast sexpressionGrammar)
15 |
16 |
--------------------------------------------------------------------------------
/test/swift/swift.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 | import Language.ANTLR4
3 | import Parser
4 | import qualified Text.ANTLR.Set as S
5 |
6 | getAST (ResultAccept ast) = ast
7 | getAST _ = error "non-AST in ResultSet"
8 |
9 | main =
10 | case glrParse isWS "var i = 0;" of
11 | (ResultAccept ast) -> print $ ast2topLevel ast
12 | (ResultSet xs) -> mapM_ (print . ast2topLevel . getAST) (S.toList xs)
13 |
14 |
--------------------------------------------------------------------------------
/test/template/Main.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 |
3 | import System.IO.Unsafe (unsafePerformIO)
4 | import Data.Monoid
5 | import Test.Framework
6 | import Test.Framework.Providers.HUnit
7 | import Test.Framework.Providers.QuickCheck2
8 | import Test.HUnit
9 | import Test.QuickCheck (Property, quickCheck, (==>))
10 | import qualified Test.QuickCheck.Monadic as TQM
11 |
12 | main :: IO ()
13 | main = defaultMainWithOpts
14 | [
15 | ] mempty
16 |
17 |
--------------------------------------------------------------------------------
/test/unit/Main.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module Main where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 |
20 | import PlusBug0
21 | $(g4_parsers plusBug0Grammar plusBug0AST)
22 |
23 | test_plusBug0 = case glrParse (== T_WS) "foo bar baz" of
24 | (ResultAccept ast) -> ast2plus ast @?= Plus [ "foo", "bar", "baz" ]
25 | _ -> assertFailure "Ambiguous parse"
26 |
27 | main :: IO ()
28 | main = defaultMainWithOpts
29 | [ testCase "test_plusBug0" test_plusBug0
30 | ] mempty
31 |
32 |
--------------------------------------------------------------------------------
/test/unit/PlusBug0.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module PlusBug0 where
6 | import Language.ANTLR4
7 |
8 | data Plus = Plus [String] | NotPlus [String]
9 | deriving (Eq, Show)
10 |
11 | [g4|
12 | grammar PlusBug0;
13 |
14 | plus : LowerID+ -> Plus ;
15 |
16 | LowerID : [a-z][a-zA-Z0-9_]* -> String;
17 | WS : [ \t\n\r\f\v]+ -> String;
18 | |]
19 |
20 |
--------------------------------------------------------------------------------
/test/unit0/DupTerms.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module DupTerms where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 |
20 | import DupTermsGrammar
21 |
22 | $(g4_parsers dupTermsAST dupTermsGrammar)
23 |
24 | test_dup_terms = case glrParse (== T_WS) "(" of
25 | (ResultAccept ast) -> ast2justParen ast @?= 3
26 | rest -> assertFailure $ "Did not parse: " ++ pshow' rest
27 |
28 |
--------------------------------------------------------------------------------
/test/unit0/DupTermsGrammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module DupTermsGrammar where
6 | import Language.ANTLR4
7 |
8 | doesThisFire :: Int
9 | doesThisFire = 3
10 |
11 | orDoesThisFire :: String -> Int
12 | orDoesThisFire _ = 4
13 |
14 | $( return [] )
15 |
16 | [g4|
17 | grammar DupTerms;
18 |
19 | justParen : '(' -> doesThisFire ;
20 |
21 | LParen : '(' -> orDoesThisFire ;
22 |
23 | WS : [ \t\n\r\f\v]+ -> String;
24 | |]
25 |
26 |
--------------------------------------------------------------------------------
/test/unit0/Main.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module Main where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 |
20 | import Star0
21 | import Star1
22 | import DupTerms
23 |
24 | main :: IO ()
25 | main = defaultMainWithOpts
26 | [ testCase "test_star0" test_star0
27 | , testCase "test_star1" test_star1
28 | , testCase "duplicate_terminals" test_dup_terms
29 | ] mempty
30 |
31 |
--------------------------------------------------------------------------------
/test/unit0/Star0.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module Star0 where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 |
20 | import Star0Grammar
21 |
22 | $(g4_parsers star0AST star0Grammar)
23 |
24 | test_star0 = case glrParse (== T_WS) "page page" of
25 | (ResultAccept ast) -> ast2words ast @?= ["page", "page"]
26 | rest -> assertFailure $ "Did not parse: " ++ pshow' rest
27 |
28 |
--------------------------------------------------------------------------------
/test/unit0/Star0Grammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module Star0Grammar where
6 | import Language.ANTLR4
7 |
8 | [g4|
9 | grammar Star0;
10 |
11 | words : page* -> ${\ps -> ps} ;
12 |
13 | page : Page ;
14 |
15 | Page : 'page' -> String ;
16 |
17 | WS : [ \t\n\r\f\v]+ -> String;
18 | |]
19 |
20 |
--------------------------------------------------------------------------------
/test/unit0/Star1.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module Star1 where
6 | import Language.ANTLR4
7 |
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import Data.Monoid
10 | import Test.Framework
11 | import Test.Framework.Providers.HUnit
12 | import Test.Framework.Providers.QuickCheck2
13 | import Test.HUnit
14 | import Test.QuickCheck (Property, quickCheck, (==>))
15 | import qualified Test.QuickCheck.Monadic as TQM
16 |
17 | import Language.Haskell.TH.Syntax (lift)
18 | import qualified Text.ANTLR.LR as LR
19 | import Debug.Trace as D
20 |
21 | import Star1Grammar
22 |
23 | $(g4_parsers star1AST star1Grammar)
24 |
25 | test_star1 = D.trace (pshow' star1Grammar) $
26 | case glrParse (== T_WS) "me page you { byte, byte }" of
27 | (ResultAccept ast) -> ast2words ast @?= Frst (Yep ("me", "page")) [] [Byte, Byte]
28 | rest -> assertFailure $ "Did not parse: " ++ pshow' rest
29 |
30 |
--------------------------------------------------------------------------------
/test/unit0/Star1Grammar.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell
4 | , DeriveDataTypeable #-}
5 | module Star1Grammar where
6 | import Language.ANTLR4
7 |
8 | data MyMaybe x = Nope | Yep x
9 | deriving (Eq, Ord, Show)
10 |
11 | data Mem = Byte
12 | deriving (Eq, Ord, Show)
13 |
14 | data Words =
15 | Frst (MyMaybe (String, String)) [String] [Mem]
16 | | Snd
17 | | Thrd (MyMaybe (String, String))
18 | deriving (Eq, Ord, Show)
19 |
20 | [g4|
21 | grammar Star1;
22 |
23 | words : me 'you' page* '{' bytes '}' -> Frst
24 | | me -> Thrd
25 | | 'woops' -> Snd
26 | ;
27 |
28 | bytes : 'byte' -> ${ [Byte] }
29 | | bytes ',' 'byte' -> ${\bs -> Byte : bs}
30 | ;
31 |
32 | me : me2? -> ${\m -> case m of Nothing -> Nope ; Just x -> Yep x};
33 | me2 : me3 ;
34 | me3 : Me page ;
35 | Me : 'me' -> String ;
36 |
37 | page : Page ;
38 | Page : 'page' -> String ;
39 |
40 | WS : [ \t\n\r\f\v]+ -> String;
41 | |]
42 |
43 |
--------------------------------------------------------------------------------